def __fillOptimizers( self, opName ): gLogger.verbose( "Filling %s optimizers" % opName ) while True: #Get first job if any jid = self.__jobOpQueue.popJob( opName ) if jid == None: gLogger.verbose( "No more jobs for optimizers %s" % opName ) #No more jobs return S_OK() result = self.__bookJobInOptimizer( opName, jid ) if not result[ 'OK' ]: gLogger.info( "No empty %s optimizers now" % opName ) self.__jobOpQueue.pushJob( opName, jid, ahead = True ) return S_OK() opState = result[ 'Value' ] result = self.__jobsState.getJob( jid ) if not result[ 'OK' ]: gLogger.warn( "Could not load job data", "for jid %s" % jid ) self.__jobsState.forgetJob( jid ) continue job = result[ 'Value' ] result = opState.requestOptimization( job ) if not result[ 'OK' ]: self.__jobOpQueue.pushJob( opName, jid, ahead = True ) gLogger.warn( "Could not request optimization", result[ 'Message' ] ) self.optimizerDisconnected( opState.getTrid() ) return S_OK()
def jobexec(jobxml, wfParameters): jobfile = os.path.abspath(jobxml) if not os.path.exists(jobfile): gLogger.warn('Path to specified workflow %s does not exist' % (jobfile)) sys.exit(1) workflow = fromXMLFile(jobfile) gLogger.debug(workflow) code = workflow.createCode() gLogger.debug(code) jobID = 0 if 'JOBID' in os.environ: jobID = os.environ['JOBID'] gLogger.info('DIRAC JobID %s is running at site %s' % (jobID, DIRAC.siteName())) workflow.addTool('JobReport', JobReport(jobID)) workflow.addTool('AccountingReport', DataStoreClient()) workflow.addTool('Request', Request()) # Propagate the command line parameters to the workflow if any for pName, pValue in wfParameters.items(): workflow.setValue(pName, pValue) # Propagate the command line parameters to the workflow module instances of each step for stepdefinition in workflow.step_definitions.itervalues(): for moduleInstance in stepdefinition.module_instances: for pName, pValue in wfParameters.iteritems(): if moduleInstance.parameters.find(pName): moduleInstance.parameters.setValue(pName, pValue) return workflow.execute()
def execute( self ): """ Remove jobs in various status """ #Delete jobs in "Deleted" state result = self.removeJobsByStatus( { 'Status' : 'Deleted' } ) if not result[ 'OK' ]: return result #Get all the Job types that can be cleaned result = self.__getAllowedJobTypes() if not result[ 'OK' ]: return result # No jobs in the system subject to removal if not result['Value']: return S_OK() baseCond = { 'JobType' : result[ 'Value' ] } # Remove jobs with final status for status in self.removeStatusDelay: delay = self.removeStatusDelay[ status ] condDict = dict( baseCond ) condDict[ 'Status' ] = status delTime = str( Time.dateTime() - delay * Time.day ) result = self.removeJobsByStatus( condDict, delTime ) if not result['OK']: gLogger.warn( 'Failed to remove jobs in status %s' % status ) return S_OK()
def addUserToEgroup(clip): """Add user to e-group""" login = gConfig.getValue("/Security/egroupAdmin","").strip('"') pwd = gConfig.getValue("/Security/egroupPass","").strip('"') url = 'https://foundservices.cern.ch/ws/egroups/v1/EgroupsWebService/EgroupsWebService.wsdl' if not ( login and pwd ): gLogger.warn("Missing configuration parameters: username or password for WSDL interactions") gLogger.warn("Add options: -o /Security/egroupAdmin=<cernusername> -o /Security/egroupPass=<password>") gLogger.error("User registration in e-group must be done manually") return try: client = Client(url=url, username=login, password=pwd) #gLogger.notice(client) except suds.transport.TransportError as exc: gLogger.error("Failed to get the WSDL client:%s" %exc) gLogger.error("User registration in e-group must be done manually") return except: gLogger.error("Something unexpected happened with the suds client, aborting") return if clip.external: sudsUser = client.factory.create("ns0:MemberType") sudsUser['Type'] = 'External' sudsUser['Email'] = clip.email userl = [sudsUser] else: user = getUserInfoFromPhonebook(client, clip) userl = [user] res = client.service.AddEgroupMembers('ilc-dirac',False, userl) if hasattr(res, 'warnings'): gLogger.notice(res.warnings)
def __setRSSStorageElementStatus( self, elementName, statusType, status, reason, tokenOwner ): """ Sets on the RSS the StorageElements status """ expiration = datetime.datetime.utcnow() + datetime.timedelta( days = 1 ) self.seCache.acquireLock() try: res = self.rssClient.modifyStatusElement( 'Resource', 'Status', name = elementName, statusType = statusType, status = status, reason = reason, tokenOwner = tokenOwner, tokenExpiration = expiration ) if res[ 'OK' ]: self.seCache.refreshCache() if not res[ 'OK' ]: _msg = 'Error updating StorageElement (%s,%s,%s)' % ( elementName, statusType, status ) gLogger.warn( 'RSS: %s' % _msg ) return res finally: # Release lock, no matter what. self.seCache.releaseLock()
def __generateReleaseNotes( self ): result = self.__loadReleaseNotesFile() if not result[ 'OK' ]: return result releaseData = result[ 'Value' ] if not releaseData: gLogger.info( "release.notes not found. Trying to find releasenotes.rst" ) for rstFileName in ( "releasenotes.rst", "releasehistory.rst" ): result = self.__compileReleaseNotes( rstFileName ) if result[ 'OK' ]: gLogger.notice( "Compiled %s file!" % rstFileName ) else: gLogger.warn( result[ 'Message' ] ) return S_OK() gLogger.info( "Loaded release.notes" ) for rstFileName, singleVersion in ( ( "releasenotes.rst", True ), ( "releasehistory.rst", False ) ): result = self.__generateRSTFile( releaseData, rstFileName, self.params.version, singleVersion ) if not result[ 'OK' ]: gLogger.error( "Could not generate %s: %s" % ( rstFileName, result[ 'Message' ] ) ) continue result = self.__compileReleaseNotes( rstFileName ) if not result[ 'OK' ]: gLogger.error( "Could not compile %s: %s" % ( rstFileName, result[ 'Message' ] ) ) continue gLogger.notice( "Compiled %s file!" % rstFileName ) return S_OK()
def export_sendHeartBeat( self, jobID, dynamicData, staticData ): """ Send a heart beat sign of life for a job jobID """ result = jobDB.setHeartBeatData( int( jobID ), staticData, dynamicData ) if not result['OK']: gLogger.warn( 'Failed to set the heart beat data for job %d ' % int( jobID ) ) # Restore the Running status if necessary # result = jobDB.getJobAttributes(jobID,['Status']) # if not result['OK']: # return result # if not result['Value']: # return S_ERROR('Job %d not found' % jobID) # status = result['Value']['Status'] # if status == "Stalled" or status == "Matched": # result = jobDB.setJobAttribute(jobID,'Status','Running',True) # if not result['OK']: # gLogger.warn('Failed to restore the job status to Running') jobMessageDict = {} result = jobDB.getJobCommand( int( jobID ) ) if result['OK']: jobMessageDict = result['Value'] if jobMessageDict: for key, _value in jobMessageDict.items(): result = jobDB.setJobCommandStatus( int( jobID ), key, 'Sent' ) return S_OK( jobMessageDict )
def _getCatalogConfigDetails( self, catalogName ): # First obtain the options that are available catalogConfigPath = '%s/%s' % ( self.rootConfigPath, catalogName ) res = gConfig.getOptions( catalogConfigPath ) if not res['OK']: errStr = "FileCatalog._getCatalogConfigDetails: Failed to get catalog options." gLogger.error( errStr, catalogName ) return S_ERROR( errStr ) catalogConfig = {} for option in res['Value']: configPath = '%s/%s' % ( catalogConfigPath, option ) optionValue = gConfig.getValue( configPath ) catalogConfig[option] = optionValue # The 'Status' option should be defined (default = 'Active') if 'Status' not in catalogConfig: warnStr = "FileCatalog._getCatalogConfigDetails: 'Status' option not defined." gLogger.warn( warnStr, catalogName ) catalogConfig['Status'] = 'Active' # The 'AccessType' option must be defined if 'AccessType' not in catalogConfig: errStr = "FileCatalog._getCatalogConfigDetails: Required option 'AccessType' not defined." gLogger.error( errStr, catalogName ) return S_ERROR( errStr ) # Anything other than 'True' in the 'Master' option means it is not catalogConfig['Master'] = ( catalogConfig.setdefault( 'Master', False ) == 'True' ) return S_OK( catalogConfig )
def submitJob( self, jdl ): """ Submit one job specified by its JDL to WMS """ if os.path.exists( jdl ): fic = open ( jdl, "r" ) jdlString = fic.read() fic.close() else: # If file JDL does not exist, assume that the JDL is passed as a string jdlString = jdl # Check the validity of the input JDL jdlString = jdlString.strip() if jdlString.find( "[" ) != 0: jdlString = "[%s]" % jdlString classAdJob = ClassAd( jdlString ) if not classAdJob.isOK(): return S_ERROR( 'Invalid job JDL' ) # Check the size and the contents of the input sandbox result = self.__uploadInputSandbox( classAdJob ) if not result['OK']: return result # Submit the job now and get the new job ID if not self.jobManager: self.jobManager = RPCClient( 'WorkloadManagement/JobManager', useCertificates = self.useCertificates, timeout = self.timeout ) result = self.jobManager.submitJob( classAdJob.asJDL() ) if 'requireProxyUpload' in result and result['requireProxyUpload']: gLogger.warn( "Need to upload the proxy" ) return result
def retrieveRepositoryOutputDataLFNs(self, requestedStates = ['Done']): """Helper function Get the list of uploaded output data for a set of jobs in a repository @param requestedStates: List of states requested for filtering the list @type requestedStates: list of strings @return: list """ llist = [] if not self.jobRepo: gLogger.warn( "No repository is initialised" ) return S_OK() jobs = self.jobRepo.readRepository()['Value'] for jobID in sortList( jobs.keys() ): jobDict = jobs[jobID] if jobDict.has_key( 'State' ) and ( jobDict['State'] in requestedStates ): if ( jobDict.has_key( 'UserOutputData' ) and ( not int( jobDict['UserOutputData'] ) ) ) or \ ( not jobDict.has_key( 'UserOutputData' ) ): params = self.parameters(int(jobID)) if params['OK']: if params['Value'].has_key('UploadedOutputData'): lfn = params['Value']['UploadedOutputData'] llist.append(lfn) return llist
def checkLockAge(lockname): """ Check if there is a lock, and in that case deal with it, potentially remove it after n minutes """ overwrite = False count = 0 while True: if not os.path.exists(lockname): break count += 1 gLogger.warn("Will wait one minute before proceeding") res = wasteCPUCycles(60) if not res['OK']: continue last_touch = time.time() try: stat = os.stat(lockname) last_touch = stat.st_atime except OSError as x: gLogger.warn("File not available: %s %s, assume removed" % (OSError, str(x))) break loc_time = time.time() if loc_time-last_touch > 30*60: ##this is where I say the file is too old to still be valid (30 minutes) gLogger.info("File is %s seconds old" % str(loc_time-last_touch)) overwrite = True res = clearLock(lockname) if res['OK']: break if count > 60: #We have been waiting for 60 minutes, something is wrong, kill it gLogger.error("Seems file stat is wrong, assume buggy, will fail installation") #overwrite = True res = clearLock(lockname) return S_ERROR("Buggy lock, removed: %s" % res['OK']) return S_OK(overwrite)
def getSiteCEMapping( gridName = '' ): """ Returns a dictionary of all sites and their CEs as a list, e.g. {'LCG.CERN.ch':['ce101.cern.ch',...]} If gridName is specified, result is restricted to that Grid type. """ siteCEMapping = {} gridTypes = gConfig.getSections( 'Resources/Sites/', [] ) if not gridTypes['OK']: gLogger.warn( 'Problem retrieving sections in /Resources/Sites' ) return gridTypes gridTypes = gridTypes['Value'] if gridName: if not gridName in gridTypes: return S_ERROR( 'Could not get sections for /Resources/Sites/%s' % gridName ) gridTypes = [gridName] gLogger.debug( 'Grid Types are: %s' % ( ', '.join( gridTypes ) ) ) for grid in gridTypes: sites = gConfig.getSections( '/Resources/Sites/%s' % grid, [] ) if not sites['OK']: gLogger.warn( 'Problem retrieving /Resources/Sites/%s section' % grid ) return sites for candidate in sites['Value']: candidateCEs = gConfig.getValue( '/Resources/Sites/%s/%s/CE' % ( grid, candidate ), [] ) if candidateCEs: siteCEMapping[candidate] = candidateCEs else: gLogger.debug( 'No CEs defined for site %s' % candidate ) return S_OK( siteCEMapping )
def __deployResources(self): """ This method copy the required files and directories to the appropriate place """ extjsDirPath = os.path.join(self.__webAppPath, 'static', self.__extDir) if not os.path.exists(extjsDirPath): try: os.mkdir(extjsDirPath) except OSError as e: gLogger.error("Can not create release extjs", repr(e)) return S_ERROR("Can not create release extjs" + repr(e)) for dirSrc in self.__extjsDirsToCopy: try: shutil.copytree(dirSrc, os.path.join(extjsDirPath, os.path.split(dirSrc)[1])) except OSError as e: if e.errno != 17: errorMsg = "Can not copy %s directory to %s: %s" % ( dirSrc, os.path.join(extjsDirPath, os.path.split(dirSrc)[1]), repr(e)) gLogger.error(errorMsg) return S_ERROR(errorMsg) else: gLogger.warn("%s directory is already exists. It will be not overwritten!" % os.path.join(extjsDirPath, os.path.split(dirSrc)[1])) for filePath in self.__extjsFilesToCopy: try: shutil.copy(filePath, extjsDirPath) except (IOError, OSError) as e: errorMsg = "Can not copy %s file to %s: %s" % (filePath, extjsDirPath, repr(e)) gLogger.warn(errorMsg) return S_OK()
def _getCatalogConfigDetails( self, catalogName ): # First obtain the options that are available result = self.reHelper.getCatalogOptionsDict( catalogName ) if not result['OK']: errStr = "FileCatalog._getCatalogConfigDetails: Failed to get catalog options" gLogger.error( errStr, catalogName ) return S_ERROR( errStr ) catalogConfig = result['Value'] # The 'Status' option should be defined (default = 'Active') if not catalogConfig.has_key( 'Status' ): warnStr = "FileCatalog._getCatalogConfigDetails: 'Status' option not defined" gLogger.warn( warnStr, catalogName ) catalogConfig['Status'] = 'Active' # The 'AccessType' option must be defined if not catalogConfig.has_key( 'AccessType' ): errStr = "FileCatalog._getCatalogConfigDetails: Required option 'AccessType' not defined" gLogger.error( errStr, catalogName ) return S_ERROR( errStr ) # Anything other than 'True' in the 'Master' option means it is not if not catalogConfig.has_key( 'Master' ): catalogConfig['Master'] = False elif catalogConfig['Master'] == 'True': catalogConfig['Master'] = True else: catalogConfig['Master'] = False return S_OK( catalogConfig )
def web_getSelectionData(self): callback = {} typeName = self.request.arguments["type"][0] # Get unique key values retVal = yield self.threadTask(self.__getUniqueKeyValues, typeName) if not retVal['OK']: self.finish({"success": "false", "result": "", "error": retVal['Message']}) return records = {} for record in retVal['Value']: # may have more than 1000 of records. # do not show all of them in the web portal length = len(retVal['Value'][record]) if length > 10000: records[record] = retVal['Value'][record][length - 5000:] message = "The %s accounting type contains to many rows: %s - > %d. Note: Only 1000 rows are returned!" % ( typeName, record, length) gLogger.warn(message) else: records[record] = retVal['Value'][record] callback["selectionValues"] = records # Cache for plotsList? data = AccountingHandler.__keysCache.get("reportsList:%s" % typeName) if not data: repClient = ReportsClient() retVal = yield self.threadTask(repClient.listReports, typeName) if not retVal['OK']: self.finish({"success": "false", "result": "", "error": retVal['Message']}) return data = retVal['Value'] AccountingHandler.__keysCache.add("reportsList:%s" % typeName, 300, data) callback["plotsList"] = data self.finish({"success": "true", "result": callback})
def __getClientInitArgs( self, trid, proposalTuple ): clientTransport = self._transportPool.get( trid ) #Get the peer credentials credDict = clientTransport.getConnectingCredentials() if 'x509Chain' not in credDict: return S_OK() cKey = ( credDict[ 'DN' ], credDict.get( 'group', False ), credDict.get( 'extraCredentials', False ), credDict[ 'isLimitedProxy' ] ) dP = self.__delegatedCredentials.get( cKey, 3600 ) idString = self._createIdentityString( credDict, clientTransport ) if dP: gLogger.verbose( "Proxy for %s is cached" % idString ) return S_OK( dP ) result = self.__requestDelegation( clientTransport, credDict ) if not result[ 'OK' ]: gLogger.warn( "Could not get proxy for %s: %s" % ( idString, result[ 'Message' ] ) ) return result delChain = result[ 'Value' ] delegatedChain = delChain.dumpAllToString()[ 'Value' ] secsLeft = delChain.getRemainingSecs()[ 'Value' ] - 1 clientInitArgs = { BaseClient.KW_SETUP : proposalTuple[0][1], BaseClient.KW_TIMEOUT : 600, BaseClient.KW_IGNORE_GATEWAYS : True, BaseClient.KW_USE_CERTIFICATES : False, BaseClient.KW_PROXY_STRING : delegatedChain } if BaseClient.KW_EXTRA_CREDENTIALS in credDict: clientInitArgs[ BaseClient.KW_EXTRA_CREDENTIALS ] = credDict[ BaseClient.KW_EXTRA_CREDENTIALS ] gLogger.warn( "Got delegated proxy for %s: %s secs left" % ( idString, secsLeft ) ) self.__delegatedCredentials.add( cKey, secsLeft, clientInitArgs ) return S_OK( clientInitArgs )
def __addUserDataToConfiguration( self ): if not self.isParsed: self.__parseCommandLine() errorsList = self.__loadCFGFiles() if gConfigurationData.getServers(): retVal = self.syncRemoteConfiguration() if not retVal[ 'OK' ]: return retVal else: gLogger.warn( "Running without remote configuration" ) try: if self.componentType == "service": self.__setDefaultSection( getServiceSection( self.componentName ) ) elif self.componentType == "agent": self.__setDefaultSection( getAgentSection( self.componentName ) ) elif self.componentType == "executor": self.__setDefaultSection( getExecutorSection( self.componentName ) ) elif self.componentType == "web": self.__setDefaultSection( "/%s" % self.componentName ) elif self.componentType == "script": if self.componentName and self.componentName[0] == "/": self.__setDefaultSection( self.componentName ) self.componentName = self.componentName[1:] else: self.__setDefaultSection( "/Scripts/%s" % self.componentName ) else: self.__setDefaultSection( "/" ) except Exception, e: errorsList.append( str( e ) )
def _upload(self, pilotDict=None, filename='', pilotScript=''): """ Method to upload the pilot json file and the pilot scripts to the server. """ if pilotDict: # this is for the pilot.json file if not self.pilotFileServer: gLogger.warn("NOT uploading the pilot JSON file, just printing it out") print json.dumps(pilotDict, indent=4, sort_keys=True) # just print here as formatting is important return S_OK() params = urllib.urlencode({'filename': self.jsonFile, 'data': json.dumps(pilotDict)}) else: # we assume the method is asked to upload the pilots scripts if not self.pilotFileServer: gLogger.warn("NOT uploading %s" % filename) return S_OK() with open(pilotScript, "rb") as psf: script = psf.read() params = urllib.urlencode({'filename': filename, 'data': script}) if ':' in self.pilotFileServer: con = HTTPDISETConnection(self.pilotFileServer.split(':')[0], self.pilotFileServer.split(':')[1]) else: con = HTTPDISETConnection(self.pilotFileServer, '443') con.request("POST", "/DIRAC/upload", params, {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}) resp = con.getresponse() if resp.status != 200: return S_ERROR(resp.status) else: gLogger.info('-- File and scripts upload done --') return S_OK()
def _executeAction( self, trid, proposalTuple, clientInitArgs ): clientTransport = self._transportPool.get( trid ) credDict = clientTransport.getConnectingCredentials() targetService = proposalTuple[0][0] actionType = proposalTuple[1][0] actionMethod = proposalTuple[1][1] idString = self._createIdentityString( credDict, clientTransport ) #OOkay! Lets do the magic! retVal = clientTransport.receiveData() if not retVal[ 'OK' ]: gLogger.error( "Error while receiving file description", retVal[ 'Message' ] ) clientTransport.sendData( S_ERROR( "Error while receiving file description: %s" % retVal[ 'Message' ] ) ) return if actionType == "FileTransfer": gLogger.warn( "Received a file transfer action from %s" % idString ) clientTransport.sendData( S_OK( "Accepted" ) ) retVal = self.__forwardFileTransferCall( targetService, clientInitArgs, actionMethod, retVal[ 'Value' ], clientTransport ) elif actionType == "RPC": gLogger.info( "Forwarding %s/%s action to %s for %s" % ( actionType, actionMethod, targetService, idString ) ) retVal = self.__forwardRPCCall( targetService, clientInitArgs, actionMethod, retVal[ 'Value' ] ) elif actionType == "Connection" and actionMethod == "new": gLogger.info( "Initiating a messaging connection to %s for %s" % ( targetService, idString ) ) retVal = self._msgForwarder.addClient( trid, targetService, clientInitArgs, retVal[ 'Value' ] ) else: gLogger.warn( "Received an invalid %s/%s action from %s" % ( actionType, actionMethod, idString ) ) retVal = S_ERROR( "Unknown type of action (%s)" % actionType ) #TODO: Send back the data? if 'rpcStub' in retVal: retVal.pop( 'rpcStub' ) clientTransport.sendData( retVal ) return retVal
def checkDBAccess( cls ): #Init DB if there if not JobState.__db.checked: JobState.__db.checked = True for varName, dbName in ( ( 'job', 'JobDB' ), ( 'log', 'JobLoggingDB' ), ( 'tq', 'TaskQueueDB' ) ): try: dbImp = "DIRAC.WorkloadManagementSystem.DB.%s" % dbName dbMod = __import__( dbImp, fromlist = [ dbImp ] ) dbClass = getattr( dbMod, dbName ) dbInstance = dbClass() setattr( JobState.__db, varName, dbInstance ) result = dbInstance._getConnection() if not result[ 'OK' ]: gLogger.warn( "Could not connect to %s (%s). Resorting to RPC" % ( dbName, result[ 'Message' ] ) ) JobState.__db.reset() break else: result[ 'Value' ].close() except RuntimeError: JobState.__db.reset() break except ImportError: JobState.__db.reset() break
def create( self ): if not isinstance( self.params, TarModuleCreator.Params ): return S_ERROR( "Argument is not a TarModuleCreator.Params object " ) result = self.params.isOK() if not result[ 'OK' ]: return result result = self.__checkDestination() if not result[ 'OK' ]: return result result = self.__checkoutSource() if not result[ 'OK' ]: return result shutil.rmtree( "%s/tests" % self.params.destination, ignore_errors=True ) shutil.rmtree( "%s/docs" % self.params.destination, ignore_errors=True ) result = self.__generateReleaseNotes() if not result[ 'OK' ]: gLogger.error( "Won't generate release notes: %s" % result[ 'Message' ] ) if 'Web' in self.params.name and self.params.name != 'Web': # if we have an extension, we have to download, because it will be # required to compile the code if self.params.extensionVersion and self.params.extensionSource: # if extensionSource is not provided, the default one is used. self.params.soureURL.... result = self.__checkoutSource( "WebAppDIRAC", self.params.extensionSource, self.params.extensionVersion ) if not result['OK']: return result retVal = self.__compileWebApp() if not retVal['OK']: #it can fail, if we do not have sencha cmd and extjs farmework installed gLogger.warn( 'Web is not compiled: %s' % retVal['Message'] ) return self.__generateTarball()
def getSESiteMapping( gridName = '' ): """ Returns a dictionary of all SEs and their associated site(s), e.g. {'CERN-RAW':'LCG.CERN.ch','CERN-RDST':'LCG.CERN.ch',...]} Although normally one site exists for a given SE, it is possible over all Grid types to have multiple entries. If gridName is specified, result is restricted to that Grid type. Assumes CS structure of: /Resources/Sites/<GRIDNAME>/<SITENAME> """ seSiteMapping = {} gridTypes = gConfig.getSections( '/Resources/Sites/' ) if not gridTypes['OK']: gLogger.warn( 'Problem retrieving sections in /Resources/Sites' ) return gridTypes gridTypes = gridTypes['Value'] if gridName: if not gridName in gridTypes: return S_ERROR( 'Could not get sections for /Resources/Sites/%s' % gridName ) gridTypes = [gridName] gLogger.debug( 'Grid Types are: %s' % ( ', '.join( gridTypes ) ) ) for grid in gridTypes: sites = gConfig.getSections( '/Resources/Sites/%s' % grid ) if not sites['OK']: #gConfig returns S_ERROR for empty sections until version gLogger.warn( 'Problem retrieving /Resources/Sites/%s section' % grid ) return sites if sites: for candidate in sites['Value']: siteSEs = gConfig.getValue( '/Resources/Sites/%s/%s/SE' % ( grid, candidate ), [] ) for se in siteSEs: if se not in seSiteMapping: seSiteMapping[se] = [] seSiteMapping[se].append( candidate ) return S_OK( seSiteMapping )
def export_sendSMS( self, userName, body, fromAddress ): """ Send an SMS with supplied body to the specified DIRAC user using the Mail utility via an SMS switch. """ gLogger.verbose( 'Received signal to send the following SMS to %s:\n%s' % ( userName, body ) ) mobile = gConfig.getValue( '/Registry/Users/%s/Mobile' % userName, '' ) if not mobile: return S_ERROR( 'No registered mobile number for %s' % userName ) csSection = PathFinder.getServiceSection( 'Framework/Notification' ) smsSwitch = gConfig.getValue( '%s/SMSSwitch' % csSection, '' ) if not smsSwitch: return S_ERROR( 'No SMS switch is defined in CS path %s/SMSSwitch' % csSection ) address = '%s@%s' % ( mobile, smsSwitch ) subject = 'DIRAC SMS' m = Mail() m._subject = subject m._message = body m._mailAddress = address if not fromAddress == 'None': m._fromAddress = fromAddress result = m._send() if not result['OK']: gLogger.warn( 'Could not send SMS to %s with the following message:\n%s' % ( userName, result['Message'] ) ) else: gLogger.info( 'SMS sent successfully to %s ' % ( userName ) ) gLogger.debug( result['Value'] ) return result
def checkCAs( self ): if not "X509_CERT_DIR" in os.environ: gLogger.warn( "X509_CERT_DIR is unset. Abort check of CAs" ) return caDir = os.environ[ "X509_CERT_DIR" ] # In globus standards .r0 files are CRLs. They have the same names of the CAs but diffent file extension searchExp = os.path.join( caDir, "*.r0" ) crlList = glob.glob( searchExp ) if not crlList: gLogger.warn( "No CRL files found for %s. Abort check of CAs" % searchExp ) return newestFPath = max( crlList, key=os.path.getmtime ) newestFTime = os.path.getmtime( newestFPath ) if newestFTime > ( time.time() - ( 2 * 24 * 3600 ) ): # At least one of the files has been updated in the last 2 days return S_OK() if not os.access(caDir, os.W_OK): gLogger.error("Your CRLs appear to be outdated, but you have no access to update them.") # Try to continue anyway... return S_OK() # Update the CAs & CRLs gLogger.notice( "Your CRLs appear to be outdated; attempting to update them..." ) bdc = BundleDeliveryClient() res = bdc.syncCAs() if not res[ 'OK' ]: gLogger.error( "Failed to update CAs", res[ 'Message' ] ) res = bdc.syncCRLs() if not res[ 'OK' ]: gLogger.error( "Failed to update CRLs", res[ 'Message' ] ) # Continue even if the update failed... return S_OK()
def export_putRequest( self, requestJSON ): """ forward request from local RequestDB to central RequestManager :param self: self reference :param str requestType: request type """ gMonitor.addMark( 'reqReceived', 1 ) requestDict = json.loads( requestJSON ) requestName = requestDict.get( "RequestID", requestDict.get( 'RequestName', "***UNKNOWN***" ) ) gLogger.info( "putRequest: got request '%s'" % requestName ) forwardable = self.__forwardable( requestDict ) if not forwardable["OK"]: gLogger.warn( "putRequest: %s" % forwardable["Message"] ) setRequest = self.requestManager().putRequest( requestJSON ) if not setRequest["OK"]: gLogger.error( "setReqeuest: unable to set request '%s' @ RequestManager: %s" % ( requestName, setRequest["Message"] ) ) # # put request to the request file cache save = self.__saveRequest( requestName, requestJSON ) if not save["OK"]: gLogger.error( "setRequest: unable to save request to the cache: %s" % save["Message"] ) return save gLogger.info( "setRequest: %s is saved to %s file" % ( requestName, save["Value"] ) ) return S_OK( { "set" : False, "saved" : True } ) gLogger.info( "setRequest: request '%s' has been set to the ReqManager" % ( requestName ) ) return S_OK( { "set" : True, "saved" : False } )
def getSiteForCE( computingElement ): """ Given a Grid CE name this method returns the DIRAC site name. WARNING: if two or more sites happen to have the same ceName/queueName, then only the first found is returned """ finalSite = '' gridTypes = gConfig.getSections( '/Resources/Sites/', [] ) if not gridTypes['OK']: gLogger.warn( 'Problem retrieving sections in /Resources/Sites' ) return gridTypes gridTypes = gridTypes['Value'] for grid in gridTypes: sites = gConfig.getSections( '/Resources/Sites/%s' % grid, [] ) if not sites['OK']: gLogger.warn( 'Problem retrieving /Resources/Sites/%s section' % grid ) return sites if sites: siteList = sites['Value'] for candidate in siteList: siteCEs = gConfig.getValue( '/Resources/Sites/%s/%s/CE' % ( grid, candidate ), [] ) if computingElement in siteCEs: finalSite = candidate break return S_OK( finalSite )
def _BySE( self ): """ Matches using TargetSE. This is the standard plugin. """ destSites = set() try: seList = ['Unknown'] if self.params['TargetSE']: if isinstance( self.params['TargetSE'], basestring ): seList = fromChar( self.params['TargetSE'] ) elif isinstance( self.params['TargetSE'], list ): seList = self.params['TargetSE'] except KeyError: pass if not seList or seList == ['Unknown']: return destSites for se in seList: res = getSitesForSE( se ) if not res['OK']: gLogger.warn( "Could not get Sites associated to SE", res['Message'] ) else: thisSESites = res['Value'] if thisSESites: # We make an OR of the possible sites destSites.update( thisSESites ) gLogger.debug( "Destinations: %s" % ','.join ( destSites ) ) return destSites
def clearPilots(self, interval=30, aborted_interval=7): """ Delete all the pilot references submitted before <interval> days """ reqList = [] reqList.append( "SELECT PilotID FROM PilotAgents WHERE SubmissionTime < DATE_SUB(UTC_TIMESTAMP(),INTERVAL %d DAY)" % interval) reqList.append( "SELECT PilotID FROM PilotAgents WHERE Status='Aborted' \ AND SubmissionTime < DATE_SUB(UTC_TIMESTAMP(),INTERVAL %d DAY)" % aborted_interval) idList = None for req in reqList: result = self._query(req) if not result['OK']: gLogger.warn('Error while clearing up pilots') else: if result['Value']: idList = [x[0] for x in result['Value']] result = self.deletePilots(idList) if not result['OK']: gLogger.warn('Error while deleting pilots') return S_OK(idList)
def setSiteStatus(self, site, status, comment='No comment'): """ Set the status of a site in the 'SiteStatus' table of RSS examples >>> siteStatus.banSite( 'site1.test.test' ) S_OK() >>> siteStatus.banSite( None ) S_ERROR( ... ) :Parameters: **site** - `String` the site that is going to be banned **comment** - `String` reason for banning :return: S_OK() || S_ERROR() """ if not status: return S_ERROR(DErrno.ERESUNK, 'status parameter is empty') # fix case sensitive string status = status.capitalize() allowedStateList = ['Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'] if status not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') if self.rssFlag: result = getProxyInfo() if result['OK']: tokenOwner = result['Value']['username'] else: return S_ERROR("Unable to get user proxy info %s " % result['Message']) tokenExpiration = datetime.utcnow() + timedelta(days=1) self.rssCache.acquireLock() try: result = self.rsClient.modifyStatusElement('Site', 'Status', status=status, name=site, tokenExpiration=tokenExpiration, reason=comment, tokenOwner=tokenOwner) if result['OK']: self.rssCache.refreshCache() else: _msg = 'Error updating status of site %s to %s' % (site, status) gLogger.warn('RSS: %s' % _msg) # Release lock, no matter what. finally: self.rssCache.releaseLock() else: if status in ['Active', 'Degraded']: result = RPCClient('WorkloadManagement/WMSAdministrator').allowSite() else: result = RPCClient('WorkloadManagement/WMSAdministrator').banSite() return result
def _filterPolicies( decisionParams, policyMatchParams): """ Method that checks if the given policy doesn't meet certain conditions """ #some policies may apply or not also depending on the VO's domain # 'CEAvailabilityPolicy' can be applied only if the CE is inside LCG if 'elementType' in decisionParams and 'name' in decisionParams: elementType = decisionParams['elementType'] name = decisionParams['name'] if elementType and elementType.upper() == 'CE' and 'domain' in policyMatchParams: #WARNING: policyMatchParams['domain'] is a list of domains domains = policyMatchParams['domain'] result = _getComputingElementsByDomainName( targetDomain = domains ) if result['OK']: ces = result['Value'] #to verify that the given CE is in the list of the LCG CEs if name not in ces: gLogger.info( "ComputingElement %s NOT found in domains %s" % ( name, domains ) ) return False else: gLogger.info( "ComputingElement %s found in domains %s" % ( name, domains ) ) else: gLogger.warn( "unable to verify if ComputingElement %s is in domains %s" % ( name, domains ) ) return False return True
def wrapped_fcn( *args, **kwargs ): userName = kwargs.pop( 'proxyUserName', '' ) userDN = kwargs.pop( 'proxyUserDN', '' ) userGroup = kwargs.pop( 'proxyUserGroup', '' ) vomsFlag = kwargs.pop( 'proxyWithVOMS', True ) proxyFilePath = kwargs.pop( 'proxyFilePath', False ) if ( userName or userDN ) and userGroup: # Setup user proxy originalUserProxy = os.environ.get( 'X509_USER_PROXY' ) if not userDN: result = getDNForUsername( userName ) if not result[ 'OK' ]: return result userDN = result[ 'Value' ][0] vomsAttr = '' if vomsFlag: vomsAttr = getVOMSAttributeForGroup( userGroup ) if vomsAttr: result = gProxyManager.downloadVOMSProxyToFile( userDN, userGroup, requiredVOMSAttribute = vomsAttr, filePath = proxyFilePath, requiredTimeLeft = 3600, cacheTime = 3600 ) else: result = gProxyManager.downloadProxyToFile( userDN, userGroup, filePath = proxyFilePath, requiredTimeLeft = 3600, cacheTime = 3600 ) if not result['OK']: gLogger.warn( "Can't download proxy to file", result['Message'] ) return result proxyFile = result['Value'] os.environ['X509_USER_PROXY'] = proxyFile # Check if the caller is executing with the host certificate useServerCertificate = gConfig.useServerCertificate() if useServerCertificate: gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' ) try: return fcn( *args, **kwargs ) except Exception as lException: value = ','.join( [str( arg ) for arg in lException.args] ) exceptType = lException.__class__.__name__ return S_ERROR( "Exception - %s: %s" % ( exceptType, value ) ) finally: # Restore the default host certificate usage if necessary if useServerCertificate: gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' ) if originalUserProxy: os.environ['X509_USER_PROXY'] = originalUserProxy else: os.environ.pop( 'X509_USER_PROXY' ) else: # No proxy substitution requested return fcn( *args, **kwargs )
def downloadSandbox( self, sbLocation, destinationDir = "", inMemory = False, unpack = True ): """ Download a sandbox file and keep it in bundled form """ if sbLocation.find( "SB:" ) != 0: return S_ERROR( "Invalid sandbox URL" ) sbLocation = sbLocation[ 3: ] sbSplit = sbLocation.split( "|" ) if len( sbSplit ) < 2: return S_ERROR( "Invalid sandbox URL" ) SEName = sbSplit[0] SEPFN = "|".join( sbSplit[1:] ) # If destination dir is not specified use current working dir # If its defined ensure the dir structure is there if not destinationDir: destinationDir = os.getcwd() else: mkDir(destinationDir) try: tmpSBDir = tempfile.mkdtemp( prefix = "TMSB." ) except Exception as e: return S_ERROR( "Cannot create temporal file: %s" % str( e ) ) se = StorageElement( SEName, vo = self.__vo ) result = returnSingleResult( se.getFile( SEPFN, localPath = tmpSBDir ) ) if not result[ 'OK' ]: return result sbFileName = os.path.basename( SEPFN ) result = S_OK() tarFileName = os.path.join( tmpSBDir, sbFileName ) if inMemory: try: tfile = open( tarFileName, 'r' ) data = tfile.read() tfile.close() os.unlink( tarFileName ) os.rmdir( tmpSBDir ) except Exception as e: os.unlink( tarFileName ) os.rmdir( tmpSBDir ) return S_ERROR( 'Failed to read the sandbox archive: %s' % str( e ) ) return S_OK( data ) if not unpack: result[ 'Value' ] = tarFileName return result try: sandboxSize = 0 tf = tarfile.open( name = tarFileName, mode = "r" ) for tarinfo in tf: tf.extract( tarinfo, path = destinationDir ) sandboxSize += tarinfo.size tf.close() result[ 'Value' ] = sandboxSize except Exception as e: result = S_ERROR( "Could not open bundle: %s" % str( e ) ) try: os.unlink( tarFileName ) os.rmdir( tmpSBDir ) except Exception as e: gLogger.warn( "Could not remove temporary dir %s: %s" % ( tmpSBDir, str( e ) ) ) return result
def getElasticDBParameters(fullname): """ Retrieve Database parameters from CS fullname should be of the form <System>/<DBname> """ cs_path = getDatabaseSection(fullname) parameters = {} result = gConfig.getOption(cs_path + '/Host') if not result['OK']: # No host name found, try at the common place result = gConfig.getOption('/Systems/NoSQLDatabases/Host') if not result['OK']: gLogger.warn( "Failed to get the configuration parameter: Host. Using localhost" ) dbHost = 'localhost' else: dbHost = result['Value'] else: dbHost = result['Value'] # Check if the host is the local one and then set it to 'localhost' to use # a socket connection if dbHost != 'localhost': localHostName = socket.getfqdn() if localHostName == dbHost: dbHost = 'localhost' parameters['Host'] = dbHost # Elasticsearch standard port result = gConfig.getOption(cs_path + '/Port') if not result['OK']: # No individual port number found, try at the common place result = gConfig.getOption('/Systems/NoSQLDatabases/Port') if not result['OK']: gLogger.warn( "Failed to get the configuration parameter: Port. Using 9200") dbPort = 9200 else: dbPort = int(result['Value']) else: dbPort = int(result['Value']) parameters['Port'] = dbPort result = gConfig.getOption(cs_path + '/User') if not result['OK']: # No individual user name found, try at the common place result = gConfig.getOption('/Systems/NoSQLDatabases/User') if not result['OK']: gLogger.warn( "Failed to get the configuration parameter: User. Assuming no user/password is provided/needed" ) dbUser = None else: dbUser = result['Value'] else: dbUser = result['Value'] parameters['User'] = dbUser result = gConfig.getOption(cs_path + '/Password') if not result['OK']: # No individual password found, try at the common place result = gConfig.getOption('/Systems/NoSQLDatabases/Password') if not result['OK']: gLogger.warn( "Failed to get the configuration parameter: Password. Assuming no user/password is provided/needed" ) dbPass = None else: dbPass = result['Value'] else: dbPass = result['Value'] parameters['Password'] = dbPass result = gConfig.getOption(cs_path + '/SSL') if not result['OK']: # No SSL option found, try at the common place result = gConfig.getOption('/Systems/NoSQLDatabases/SSL') if not result['OK']: gLogger.warn( "Failed to get the configuration parameter: SSL. Assuming SSL is needed" ) ssl = True else: ssl = False if result['Value'].lower() in ('false', 'no', 'n') else True else: ssl = False if result['Value'].lower() in ('false', 'no', 'n') else True parameters['SSL'] = ssl return S_OK(parameters)
def execute(self): ''' Main execution method ''' gMonitor.addMark('Iteration', 1) # Get all the transformations result = self.transClient.getTransformations({ 'Status': 'Active', 'Type': self.transformationTypes }) if not result['OK']: gLogger.error( "InputDataAgent.execute: Failed to get transformations.", result['Message']) return S_OK() # Process each transformation for transDict in result['Value']: transID = long(transDict['TransformationID']) # res = self.transClient.getTransformationInputDataQuery( transID ) res = self.transClient.getTransformationMetaQuery(transID, 'Input') if not res['OK']: if res['Message'] == 'No InputDataQuery found for transformation': gLogger.info( "InputDataAgent.execute: No input data query found for transformation %d" % transID) else: gLogger.error( "InputDataAgent.execute: Failed to get input data query for %d" % transID, res['Message']) continue inputDataQuery = res['Value'] if self.refreshonly: # Determine the correct time stamp to use for this transformation if transID in self.timeLog: if transID in self.fullTimeLog: # If it is more than a day since the last reduced query, make a full query just in case if (datetime.datetime.utcnow() - self.fullTimeLog[transID] ) < datetime.timedelta( seconds=self.fullUpdatePeriod): timeStamp = self.timeLog[transID] if self.dateKey: inputDataQuery[self.dateKey] = ( timeStamp - datetime.timedelta(seconds=10) ).strftime('%Y-%m-%d %H:%M:%S') else: gLogger.error( "DateKey was not set in the CS, cannot use the RefreshOnly" ) else: self.fullTimeLog[ transID] = datetime.datetime.utcnow() self.timeLog[transID] = datetime.datetime.utcnow() if transID not in self.fullTimeLog: self.fullTimeLog[transID] = datetime.datetime.utcnow() # Perform the query to the metadata catalog gLogger.verbose( "Using input data query for transformation %d: %s" % (transID, str(inputDataQuery))) start = time.time() result = self.metadataClient.findFilesByMetadata(inputDataQuery) rtime = time.time() - start gLogger.verbose("Metadata catalog query time: %.2f seconds." % (rtime)) if not result['OK']: gLogger.error( "InputDataAgent.execute: Failed to get response from the metadata catalog", result['Message']) continue lfnList = result['Value'] # Check if the number of files has changed since the last cycle nlfns = len(lfnList) gLogger.info( "%d files returned for transformation %d from the metadata catalog" % (nlfns, int(transID))) if nlfns == self.fileLog.get(transID): gLogger.verbose( 'No new files in metadata catalog since last check') self.fileLog[transID] = nlfns # Add any new files to the transformation addedLfns = [] if lfnList: gLogger.verbose('Processing %d lfns for transformation %d' % (len(lfnList), transID)) # Add the files to the transformation gLogger.verbose('Adding %d lfns for transformation %d' % (len(lfnList), transID)) result = self.transClient.addFilesToTransformation( transID, sorted(lfnList)) if not result['OK']: gLogger.warn( "InputDataAgent.execute: failed to add lfns to transformation", result['Message']) self.fileLog[transID] = 0 else: if result['Value']['Failed']: for lfn, error in res['Value']['Failed'].items(): gLogger.warn( "InputDataAgent.execute: Failed to add %s to transformation" % lfn, error) if result['Value']['Successful']: for lfn, status in result['Value']['Successful'].items( ): if status == 'Added': addedLfns.append(lfn) gLogger.info( "InputDataAgent.execute: Added %d files to transformation" % len(addedLfns)) return S_OK()
def removeJobsByStatus(self, condDict, delay=False): """ Remove deleted jobs """ if delay: gLogger.verbose("Removing jobs with %s and older than %s day(s)" % (condDict, delay)) result = self.jobDB.selectJobs(condDict, older=delay, limit=self.maxJobsAtOnce) else: gLogger.verbose("Removing jobs with %s " % condDict) result = self.jobDB.selectJobs(condDict, limit=self.maxJobsAtOnce) if not result['OK']: return result jobList = result['Value'] if len(jobList) > self.maxJobsAtOnce: jobList = jobList[:self.maxJobsAtOnce] if not jobList: return S_OK() self.log.notice("Deleting %s jobs for %s" % (len(jobList), condDict)) count = 0 error_count = 0 result = SandboxStoreClient(useCertificates=True).unassignJobs(jobList) if not result['OK']: gLogger.error("Cannot unassign jobs to sandboxes", result['Message']) return result result = self.deleteJobOversizedSandbox(jobList) if not result['OK']: gLogger.error("Cannot schedule removal of oversized sandboxes", result['Message']) return result failedJobs = result['Value']['Failed'] for job in failedJobs: jobList.pop(jobList.index(job)) # TODO: we should not remove a job if it still has requests in the RequestManager. # But this logic should go in the client or in the service, and right now no service expose jobDB.removeJobFromDB if self.jobByJob: for jobID in jobList: resultJobDB = self.jobDB.removeJobFromDB(jobID) resultTQ = self.taskQueueDB.deleteJob(jobID) resultLogDB = self.jobLoggingDB.deleteJob(jobID) errorFlag = False if not resultJobDB['OK']: gLogger.warn('Failed to remove job %d from JobDB' % jobID, result['Message']) errorFlag = True if not resultTQ['OK']: gLogger.warn( 'Failed to remove job %d from TaskQueueDB' % jobID, result['Message']) errorFlag = True if not resultLogDB['OK']: gLogger.warn( 'Failed to remove job %d from JobLoggingDB' % jobID, result['Message']) errorFlag = True if errorFlag: error_count += 1 else: count += 1 if self.throttlingPeriod: time.sleep(self.throttlingPeriod) else: result = self.jobDB.removeJobFromDB(jobList) if not result['OK']: gLogger.error('Failed to delete %d jobs from JobDB' % len(jobList)) else: gLogger.info('Deleted %d jobs from JobDB' % len(jobList)) for jobID in jobList: resultTQ = self.taskQueueDB.deleteJob(jobID) if not resultTQ['OK']: gLogger.warn( 'Failed to remove job %d from TaskQueueDB' % jobID, resultTQ['Message']) error_count += 1 else: count += 1 result = self.jobLoggingDB.deleteJob(jobList) if not result['OK']: gLogger.error('Failed to delete %d jobs from JobLoggingDB' % len(jobList)) else: gLogger.info('Deleted %d jobs from JobLoggingDB' % len(jobList)) if count > 0 or error_count > 0: gLogger.info('Deleted %d jobs from JobDB, %d errors' % (count, error_count)) return S_OK()
def __uploadInputSandbox(self, classAdJob, jobDescriptionObject=None): """Checks the validity of the job Input Sandbox. The function returns the list of Input Sandbox files. The total volume of the input sandbox is evaluated """ inputSandbox = self.__getInputSandboxEntries(classAdJob) realFiles = [] badFiles = [] diskFiles = [] for isFile in inputSandbox: if not isFile.startswith(('lfn:', 'LFN:', 'SB:', '%s', '%(')): realFiles.append(isFile) stringIOFiles = [] stringIOFilesSize = 0 if jobDescriptionObject is not None: if isinstance(jobDescriptionObject, StringIO.StringIO): stringIOFiles = [jobDescriptionObject] stringIOFilesSize = len(jobDescriptionObject.buf) gLogger.debug("Size of the stringIOFiles: " + str(stringIOFilesSize)) else: return S_ERROR( EWMSJDL, "jobDescriptionObject is not a StringIO object") # Check real files for isFile in realFiles: if not os.path.exists( isFile ): # we are passing in real files, we expect them to be on disk badFiles.append(isFile) gLogger.warn("inputSandbox file/directory " + isFile + " not found. Keep looking for the others") continue diskFiles.append(isFile) diskFilesSize = File.getGlobbedTotalSize(diskFiles) gLogger.debug("Size of the diskFiles: " + str(diskFilesSize)) totalSize = diskFilesSize + stringIOFilesSize gLogger.verbose("Total size of the inputSandbox: " + str(totalSize)) okFiles = stringIOFiles + diskFiles if badFiles: result = S_ERROR(EWMSJDL, 'Input Sandbox is not valid') result['BadFile'] = badFiles result['TotalSize'] = totalSize return result if okFiles: if not self.sandboxClient: self.sandboxClient = SandboxStoreClient( useCertificates=self.useCertificates) result = self.sandboxClient.uploadFilesAsSandbox(okFiles) if not result['OK']: return result inputSandbox.append(result['Value']) classAdJob.insertAttributeVectorString("InputSandbox", inputSandbox) return S_OK()
def submitJob(self, jdl, jobDescriptionObject=None): """ Submit one job specified by its JDL to WMS. The JDL may actually be the desciption of a parametric job, resulting in multiple DIRAC jobs submitted to the DIRAC WMS """ if os.path.exists(jdl): with open(jdl, "r") as fic: jdlString = fic.read() else: # If file JDL does not exist, assume that the JDL is passed as a string jdlString = jdl jdlString = jdlString.strip() # Strip of comments in the jdl string newJdlList = [] for line in jdlString.split('\n'): if not line.strip().startswith('#'): newJdlList.append(line) jdlString = '\n'.join(newJdlList) # Check the validity of the input JDL if jdlString.find("[") != 0: jdlString = "[%s]" % jdlString classAdJob = ClassAd(jdlString) if not classAdJob.isOK(): return S_ERROR(EWMSJDL, 'Invalid job JDL') # Check the size and the contents of the input sandbox result = self.__uploadInputSandbox(classAdJob, jobDescriptionObject) if not result['OK']: return result # Submit the job now and get the new job ID result = getParameterVectorLength(classAdJob) if not result['OK']: return result nJobs = result['Value'] parametricJob = nJobs > 0 if not self.jobManager: self.jobManager = RPCClient('WorkloadManagement/JobManager', useCertificates=self.useCertificates, timeout=self.timeout) result = self.jobManager.submitJob(classAdJob.asJDL()) if parametricJob: gLogger.debug('Applying transactional job submission') # The server applies transactional bulk submission, we should confirm the jobs if result['OK']: jobIDList = result['Value'] if len(jobIDList) == nJobs: # Confirm the submitted jobs confirmed = False for _attempt in xrange(3): result = self.jobManager.confirmBulkSubmission( jobIDList) if result['OK']: confirmed = True break time.sleep(1) if not confirmed: # The bulk submission failed, try to delete the created jobs resultDelete = self.jobManager.deleteJob(jobIDList) error = "Job submission failed to confirm bulk transaction" if not resultDelete['OK']: error += "; removal of created jobs failed" return S_ERROR(EWMSSUBM, error) else: return S_ERROR( EWMSSUBM, "The number of submitted jobs does not match job description" ) if result.get('requireProxyUpload'): gLogger.warn("Need to upload the proxy") return result
def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result
def getSiteSEMapping(self): """ Returns a dictionary of all sites and their localSEs as a list, e.g. {'LCG.CERN.ch':['CERN-RAW','CERN-RDST',...]} """ if self.siteSEMapping: return S_OK(self.siteSEMapping) # Get the list of SEs and keep a mapping of those using an Alias or a BaseSE storageElements = gConfig.getSections('Resources/StorageElements') if not storageElements['OK']: gLogger.warn('Problem retrieving storage elements', storageElements['Message']) return storageElements storageElements = storageElements['Value'] equivalentSEs = {} for se in storageElements: for option in ('BaseSE', 'Alias'): originalSE = gConfig.getValue( 'Resources/StorageElements/%s/%s' % (se, option)) if originalSE: equivalentSEs.setdefault(originalSE, []).append(se) break siteSEMapping = {} gridTypes = gConfig.getSections('Resources/Sites/') if not gridTypes['OK']: gLogger.warn('Problem retrieving sections in /Resources/Sites', gridTypes['Message']) return gridTypes gridTypes = gridTypes['Value'] gLogger.debug('Grid Types are: %s' % (', '.join(gridTypes))) # Get a list of sites and their local SEs siteSet = set() storageElementSet = set() siteSEMapping[LOCAL] = {} for grid in gridTypes: result = gConfig.getSections('/Resources/Sites/%s' % grid) if not result['OK']: gLogger.warn('Problem retrieving /Resources/Sites/%s section' % grid) return result sites = result['Value'] siteSet.update(sites) for site in sites: candidateSEs = gConfig.getValue( '/Resources/Sites/%s/%s/SE' % (grid, site), []) if candidateSEs: candidateSEs += [ eqSE for se in candidateSEs for eqSE in equivalentSEs.get(se, []) ] siteSEMapping[LOCAL].setdefault(site, set()).update(candidateSEs) storageElementSet.update(candidateSEs) # Add Sites from the SiteSEMappingByProtocol in the CS siteSEMapping[PROTOCOL] = {} cfgLocalSEPath = cfgPath('SiteSEMappingByProtocol') result = self.__opsHelper.getOptionsDict(cfgLocalSEPath) if result['OK']: sites = result['Value'] for site in sites: candidates = set( self.__opsHelper.getValue(cfgPath(cfgLocalSEPath, site), [])) ses = set(resolveSEGroup(candidates - siteSet)) | (candidates & siteSet) # If a candidate is a site, then all local SEs are eligible for candidate in ses & siteSet: ses.remove(candidate) ses.update(siteSEMapping[LOCAL][candidate]) siteSEMapping[PROTOCOL].setdefault(site, set()).update(ses) # Add Sites from the SiteSEMappingByDownload in the CS, else SiteLocalSEMapping (old convention) siteSEMapping[DOWNLOAD] = {} cfgLocalSEPath = cfgPath('SiteSEMappingByDownload') result = self.__opsHelper.getOptionsDict(cfgLocalSEPath) if not result['OK']: cfgLocalSEPath = cfgPath('SiteLocalSEMapping') result = self.__opsHelper.getOptionsDict(cfgLocalSEPath) if result['OK']: sites = result['Value'] for site in sites: candidates = set( self.__opsHelper.getValue(cfgPath(cfgLocalSEPath, site), [])) ses = set(resolveSEGroup(candidates - siteSet)) | (candidates & siteSet) # If a candidate is a site, then all local SEs are eligible for candidate in ses & siteSet: ses.remove(candidate) ses.update(siteSEMapping[LOCAL][candidate]) siteSEMapping[DOWNLOAD].setdefault(site, set()).update(ses) self.siteSEMapping = siteSEMapping # Add storage elements that may not be associated with a site result = gConfig.getSections('/Resources/StorageElements') if not result['OK']: gLogger.warn( 'Problem retrieving /Resources/StorageElements section', result['Message']) return result self.storageElementSet = storageElementSet | set(result['Value']) self.siteSet = siteSet return S_OK(siteSEMapping)
def submitJob(self, executableFile, proxy, numberOfJobs=1, processors=1): """ Method to submit job """ # Assume that the ARC queues are always of the format nordugrid-<batchSystem>-<queue> # And none of our supported batch systems have a "-" in their name self.arcQueue = self.queue.split("-", 2)[2] result = self._prepareProxy() if not result['OK']: gLogger.error('ARCComputingElement: failed to set up proxy', result['Message']) return result self.usercfg.ProxyPath(os.environ['X509_USER_PROXY']) gLogger.verbose("Executable file path: %s" % executableFile) if not os.access(executableFile, 5): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH + stat.S_IXOTH) batchIDList = [] stampDict = {} endpoint = arc.Endpoint(self.ceHost + ":2811/jobs", arc.Endpoint.JOBSUBMIT, "org.nordugrid.gridftpjob") # Submit jobs iteratively for now. Tentatively easier than mucking around with the JobSupervisor class for __i in range(numberOfJobs): # The basic job description jobdescs = arc.JobDescriptionList() # Get the job into the ARC way xrslString, diracStamp = self.__writeXRSL(executableFile, processors) gLogger.debug("XRSL string submitted : %s" % xrslString) gLogger.debug("DIRAC stamp for job : %s" % diracStamp) if not arc.JobDescription_Parse(xrslString, jobdescs): gLogger.error("Invalid job description") break # Submit the job jobs = arc.JobList() # filled by the submit process submitter = arc.Submitter(self.usercfg) result = submitter.Submit(endpoint, jobdescs, jobs) # Save info or else ..else. if result == arc.SubmissionStatus.NONE: # Job successfully submitted pilotJobReference = jobs[0].JobID batchIDList.append(pilotJobReference) stampDict[pilotJobReference] = diracStamp gLogger.debug("Successfully submitted job %s to CE %s" % (pilotJobReference, self.ceHost)) else: message = "Failed to submit job because " if result.isSet(arc.SubmissionStatus.NOT_IMPLEMENTED): # pylint: disable=no-member gLogger.warn( "%s feature not implemented on CE? (weird I know - complain to site admins" % message) if result.isSet(arc.SubmissionStatus.NO_SERVICES): # pylint: disable=no-member gLogger.warn( "%s no services are running on CE? (open GGUS ticket to site admins" % message) if result.isSet(arc.SubmissionStatus.ENDPOINT_NOT_QUERIED): # pylint: disable=no-member gLogger.warn( "%s endpoint was not even queried. (network ..?)" % message) if result.isSet(arc.SubmissionStatus.BROKER_PLUGIN_NOT_LOADED): # pylint: disable=no-member gLogger.warn( "%s BROKER_PLUGIN_NOT_LOADED : ARC library installation problem?" % message) if result.isSet( arc.SubmissionStatus.DESCRIPTION_NOT_SUBMITTED): # pylint: disable=no-member gLogger.warn( "%s Job not submitted - incorrect job description? (missing field in XRSL string?)" % message) if result.isSet( arc.SubmissionStatus.SUBMITTER_PLUGIN_NOT_LOADED): # pylint: disable=no-member gLogger.warn( "%s SUBMITTER_PLUGIN_NOT_LOADED : ARC library installation problem?" % message) if result.isSet(arc.SubmissionStatus.AUTHENTICATION_ERROR): # pylint: disable=no-member gLogger.warn( "%s authentication error - screwed up / expired proxy? Renew / upload pilot proxy on machine?" % message) if result.isSet(arc.SubmissionStatus.ERROR_FROM_ENDPOINT): # pylint: disable=no-member gLogger.warn( "%s some error from the CE - possibly CE problems?" % message) gLogger.warn("%s ... maybe above messages will give a hint." % message) break # Boo hoo *sniff* if batchIDList: result = S_OK(batchIDList) result['PilotStampDict'] = stampDict else: result = S_ERROR( 'No pilot references obtained from the ARC job submission') return result
def setSiteStatus( self, site, status, comment = 'No comment' ): """ Set the status of a site in the 'SiteStatus' table of RSS examples >>> siteStatus.banSite( 'site1.test.test' ) S_OK() >>> siteStatus.banSite( None ) S_ERROR( ... ) :Parameters: **site** - `String` the site that is going to be banned **comment** - `String` reason for banning :return: S_OK() || S_ERROR() """ if not status: return S_ERROR(DErrno.ERESUNK, 'status parameter is empty') # fix case sensitive string status = status.capitalize() allowedStateList = ['Active', 'Banned', 'Degraded', 'Probing', 'Error', 'Unknown'] if status not in allowedStateList: return S_ERROR(errno.EINVAL, 'Not a valid status, parameter rejected') if self.rssFlag: result = getProxyInfo() if result['OK']: tokenOwner = result['Value']['username'] else: return S_ERROR( "Unable to get user proxy info %s " % result['Message'] ) tokenExpiration = datetime.utcnow() + timedelta( days = 1 ) self.rssCache.acquireLock() try: result = self.rsClient.modifyStatusElement( 'Site', 'Status', status = status, name = site, tokenExpiration = tokenExpiration, reason = comment, tokenOwner = tokenOwner ) if result['OK']: self.rssCache.refreshCache() else: _msg = 'Error updating status of site %s to %s' % ( site, status ) gLogger.warn( 'RSS: %s' % _msg ) # Release lock, no matter what. finally: self.rssCache.releaseLock() else: if status in ['Active', 'Degraded']: result = RPCClient('WorkloadManagement/WMSAdministrator').allowSite() else: result = RPCClient('WorkloadManagement/WMSAdministrator').banSite() return result
def getCPUTime(cpuNormalizationFactor): """Trying to get CPUTime left for execution (in seconds). It will first look to get the work left looking for batch system information useing the TimeLeft utility. If it succeeds, it will convert it in real second, and return it. If it fails, it tries to get it from the static info found in CS. If it fails, it returns the default, which is a large 9999999, that we may consider as "Infinite". This is a generic method, independent from the middleware of the resource if TimeLeft doesn't return a value args: cpuNormalizationFactor (float): the CPU power of the current Worker Node. If not passed in, it's get from the local configuration returns: cpuTimeLeft (int): the CPU time left, in seconds """ cpuTimeLeft = 0.0 cpuWorkLeft = gConfig.getValue("/LocalSite/CPUTimeLeft", 0) if not cpuWorkLeft: # Try and get the information from the CPU left utility result = TimeLeft().getTimeLeft() if result["OK"]: cpuWorkLeft = result["Value"] if cpuWorkLeft > 0: # This is in HS06sseconds # We need to convert in real seconds if not cpuNormalizationFactor: # if cpuNormalizationFactor passed in is 0, try get it from the local cfg cpuNormalizationFactor = gConfig.getValue( "/LocalSite/CPUNormalizationFactor", 0.0) if cpuNormalizationFactor: cpuTimeLeft = cpuWorkLeft / cpuNormalizationFactor if not cpuTimeLeft: # now we know that we have to find the CPUTimeLeft by looking in the CS # this is not granted to be correct as the CS units may not be real seconds gridCE = gConfig.getValue("/LocalSite/GridCE") ceQueue = gConfig.getValue("/LocalSite/CEQueue") if not ceQueue: # we have to look for a ceQueue in the CS # A bit hacky. We should better profit from something generic gLogger.warn( "No CEQueue in local configuration, looking to find one in CS") siteName = DIRAC.siteName() queueSection = "/Resources/Sites/%s/%s/CEs/%s/Queues" % ( siteName.split(".")[0], siteName, gridCE) res = gConfig.getSections(queueSection) if not res["OK"]: raise RuntimeError(res["Message"]) queues = res["Value"] cpuTimes = [ gConfig.getValue(queueSection + "/" + queue + "/maxCPUTime", 9999999.0) for queue in queues ] # These are (real, wall clock) minutes - damn BDII! cpuTimeLeft = min(cpuTimes) * 60 else: queueInfo = getQueueInfo("%s/%s" % (gridCE, ceQueue)) cpuTimeLeft = 9999999.0 if not queueInfo["OK"] or not queueInfo["Value"]: gLogger.warn( "Can't find a CE/queue, defaulting CPUTime to %d" % cpuTimeLeft) else: queueCSSection = queueInfo["Value"]["QueueCSSection"] # These are (real, wall clock) minutes - damn BDII! cpuTimeInMinutes = gConfig.getValue( "%s/maxCPUTime" % queueCSSection, 0.0) if cpuTimeInMinutes: cpuTimeLeft = cpuTimeInMinutes * 60.0 gLogger.info("CPUTime for %s: %f" % (queueCSSection, cpuTimeLeft)) else: gLogger.warn( "Can't find maxCPUTime for %s, defaulting CPUTime to %f" % (queueCSSection, cpuTimeLeft)) return int(cpuTimeLeft)
def execute(arguments): """ The only real function executed here """ global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if 'WorkingDirectory' in arguments: wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( 'JobWrapperTemplate found that the working directory already exists' ) rescheduleResult = rescheduleFailedJob( jobID, 'Working Directory already exists') else: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper failed the initialization phase', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper failed sending job accounting', lException=exc) return 1 if 'InputSandbox' in arguments['Job']: gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while downloading input sandbox', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if 'InputData' in arguments['Job']: if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while resolving input data', lException=exc) rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute() if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError((result['Message'], result['Errno'])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport) job.sendJobAccounting(rescheduleResult, 'JobWrapper execution') return 1 gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception('Job raised exception during execution phase', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']: try: result = job.processJobOutputs() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError as exc: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while processing output files', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize() except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception during the finalization phase', lException=exc) return 2
def main(): global force from DIRAC.FrameworkSystem.Client.ComponentInstaller import gComponentInstaller gComponentInstaller.exitOnError = True Script.registerSwitch("f", "force", "Forces the removal of the logs", setForce) # Registering arguments will automatically add their description to the help menu Script.registerArgument(( "System/Component: Full component name (ie: WorkloadManagement/Matcher)", "System: Name of the DIRAC system (ie: WorkloadManagement)", )) Script.registerArgument( " Component: Name of the DIRAC service (ie: Matcher)", mandatory=False) _, args = Script.parseCommandLine() if len(args) == 1: args = args[0].split("/") if len(args) < 2: Script.showHelp(exitCode=1) system = args[0] component = args[1] monitoringClient = ComponentMonitoringClient() result = monitoringClient.getInstallations( { "Instance": component, "UnInstallationTime": None }, {"System": system}, {"HostName": socket.getfqdn()}, True) if not result["OK"]: gLogger.error(result["Message"]) DIRACexit(1) if len(result["Value"]) < 1: gLogger.warn("Given component does not exist") DIRACexit(1) if len(result["Value"]) > 1: gLogger.error("Too many components match") DIRACexit(1) removeLogs = False if force: removeLogs = True else: if result["Value"][0]["Component"][ "Type"] in gComponentInstaller.componentTypes: result = promptUser("Remove logs?", ["y", "n"], "n") if result["OK"]: removeLogs = result["Value"] == "y" else: gLogger.error(result["Message"]) DIRACexit(1) result = gComponentInstaller.uninstallComponent(system, component, removeLogs) if not result["OK"]: gLogger.error(result["Message"]) DIRACexit(1) result = MonitoringUtilities.monitorUninstallation(system, component) if not result["OK"]: gLogger.error(result["Message"]) DIRACexit(1) gLogger.notice("Successfully uninstalled component %s/%s" % (system, component)) DIRACexit()
def getJobStatus(self, jobIDList): """ Get the status information for the given list of jobs """ result = self._prepareProxy() if not result['OK']: gLogger.error('ARCComputingElement: failed to set up proxy', result['Message']) return result self.usercfg.ProxyPath(os.environ['X509_USER_PROXY']) jobTmpList = list(jobIDList) if isinstance(jobIDList, six.string_types): jobTmpList = [jobIDList] # Pilots are stored with a DIRAC stamp (":::XXXXX") appended jobList = [] for j in jobTmpList: if ":::" in j: job = j.split(":::")[0] else: job = j jobList.append(job) jobs = [] for jobID in jobList: jobs.append(self.__getARCJob(jobID)) # JobSupervisor is able to aggregate jobs to perform bulk operations and thus minimizes the communication overhead job_supervisor = arc.JobSupervisor(self.usercfg, jobs) job_supervisor.Update() jobsUpdated = job_supervisor.GetAllJobs() resultDict = {} jobsToRenew = [] jobsToCancel = [] for job in jobsUpdated: jobID = job.JobID gLogger.debug("Retrieving status for job %s" % jobID) arcState = job.State.GetGeneralState() gLogger.debug("ARC status for job %s is %s" % (jobID, arcState)) if arcState: # Meaning arcState is filled. Is this good python? resultDict[jobID] = self.mapStates[arcState] # Renew proxy only of jobs which are running or queuing if arcState in ("Running", "Queuing"): nearExpiry = arc.Time() + arc.Period( 10000) # 2 hours, 46 minutes and 40 seconds if job.ProxyExpirationTime < nearExpiry: # Jobs to renew are aggregated to perform bulk operations jobsToRenew.append(job) gLogger.debug( "Renewing proxy for job %s whose proxy expires at %s" % (jobID, job.ProxyExpirationTime)) if arcState == "Hold": # Jobs to cancel are aggregated to perform bulk operations # Cancel held jobs so they don't sit in the queue forever jobsToCancel.append(job) gLogger.debug("Killing held job %s" % jobID) else: resultDict[jobID] = 'Unknown' # If done - is it really done? Check the exit code if resultDict[jobID] == "Done": exitCode = int(job.ExitCode) if exitCode: resultDict[jobID] = "Failed" gLogger.debug("DIRAC status for job %s is %s" % (jobID, resultDict[jobID])) # JobSupervisor is able to aggregate jobs to perform bulk operations and thus minimizes the communication overhead job_supervisor_renew = arc.JobSupervisor(self.usercfg, jobsToRenew) if not job_supervisor_renew.Renew(): gLogger.warn( 'At least one of the jobs failed to renew its credentials') job_supervisor_cancel = arc.JobSupervisor(self.usercfg, jobsToCancel) if not job_supervisor_cancel.Cancel(): gLogger.warn('At least one of the jobs failed to be cancelled') if not resultDict: return S_ERROR('No job statuses returned') return S_OK(resultDict)
def downloadSandbox(self, sbLocation, destinationDir="", inMemory=False, unpack=True): """ Download a sandbox file and keep it in bundled form """ if sbLocation.find("SB:") != 0: return S_ERROR("Invalid sandbox URL") sbLocation = sbLocation[3:] sbSplit = sbLocation.split("|") if len(sbSplit) < 2: return S_ERROR("Invalid sandbox URL") seName = sbSplit[0] sePFN = "|".join(sbSplit[1:]) try: tmpSBDir = tempfile.mkdtemp(prefix="TMSB.") except IOError as e: return S_ERROR("Cannot create temporary file: %s" % repr(e)) se = StorageElement(seName, vo=self.__vo) result = returnSingleResult(se.getFile(sePFN, localPath=tmpSBDir)) if not result['OK']: return result sbFileName = os.path.basename(sePFN) result = S_OK() tarFileName = os.path.join(tmpSBDir, sbFileName) if inMemory: try: with open(tarFileName, 'r') as tfile: data = tfile.read() except IOError as e: return S_ERROR('Failed to read the sandbox archive: %s' % repr(e)) finally: os.unlink(tarFileName) os.rmdir(tmpSBDir) return S_OK(data) # If destination dir is not specified use current working dir # If its defined ensure the dir structure is there if not destinationDir: destinationDir = os.getcwd() else: mkDir(destinationDir) if not unpack: result['Value'] = tarFileName return result try: sandboxSize = 0 with tarfile.open(name=tarFileName, mode="r") as tf: for tarinfo in tf: tf.extract(tarinfo, path=destinationDir) sandboxSize += tarinfo.size # FIXME: here we return the size, but otherwise we always return the location: inconsistent # FIXME: looks like this size is used by the JobWrapper result['Value'] = sandboxSize except IOError as e: result = S_ERROR("Could not open bundle: %s" % repr(e)) try: os.unlink(tarFileName) os.rmdir(tmpSBDir) except OSError as e: gLogger.warn("Could not remove temporary dir %s: %s" % (tmpSBDir, repr(e))) return result
def resolveTarget(self): """ find target SE eligible for submission :param self: self reference """ toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get('Status') not in self.noSubmitStatus ] if not toResolve: return S_OK() res = self.__updateReplicaCache(toResolve) if not res['OK']: return res for lfn in toResolve: res = returnSingleResult(self.oTargetSE.getURL(lfn, protocol='srm')) if not res['OK']: reason = res.get('Message', res['Message']) gLogger.warn("resolveTarget: skipping %s - %s" % (lfn, reason)) self.__setFileParameter(lfn, 'Reason', reason) self.__setFileParameter(lfn, 'Status', 'Failed') continue res = self.setTargetSURL(lfn, res['Value']) if not res['OK']: gLogger.warn("resolveTarget: skipping %s - %s" % (lfn, res["Message"])) self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue toResolve = [] for lfn in self.fileDict: if "Target" in self.fileDict[lfn]: toResolve.append(lfn) if not toResolve: return S_ERROR("No eligible Target files") res = self.oTargetSE.exists(toResolve) if not res['OK']: return S_ERROR("Failed to check target existence") for lfn, error in res['Value']['Failed'].items(): self.__setFileParameter(lfn, 'Reason', error) self.__setFileParameter(lfn, 'Status', 'Failed') toRemove = [] for lfn, exists in res['Value']['Successful'].items(): if exists: res = self.getSourceSURL(lfn) if not res['OK']: gLogger.warn("resolveTarget: skipping %s - target exists" % lfn) self.__setFileParameter(lfn, 'Reason', "Target exists") self.__setFileParameter(lfn, 'Status', 'Failed') elif res['Value'] == self.fileDict[lfn]['Target']: gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn) self.__setFileParameter(lfn, 'Reason', "Source and Target the same") self.__setFileParameter(lfn, 'Status', 'Failed') else: toRemove.append(lfn) if toRemove: self.oTargetSE.removeFile(toRemove) return S_OK()
def generateProxy(params): if params.checkClock: result = getClockDeviation() if result['OK']: deviation = result['Value'] if deviation > 600: gLogger.error( "Your host clock seems to be off by more than TEN MINUTES! Thats really bad." ) gLogger.error( "We're cowardly refusing to generate a proxy. Please fix your system time" ) sys.exit(1) elif deviation > 180: gLogger.error( "Your host clock seems to be off by more than THREE minutes! Thats bad." ) gLogger.notice( "We'll generate the proxy but please fix your system time") elif deviation > 60: gLogger.error( "Your host clock seems to be off by more than a minute! Thats not good." ) gLogger.notice( "We'll generate the proxy but please fix your system time") certLoc = params.certLoc keyLoc = params.keyLoc if not certLoc or not keyLoc: cakLoc = Locations.getCertificateAndKeyLocation() if not cakLoc: return S_ERROR("Can't find user certificate and key") if not certLoc: certLoc = cakLoc[0] if not keyLoc: keyLoc = cakLoc[1] params.certLoc = certLoc params.keyLoc = keyLoc #Load password testChain = X509Chain() retVal = testChain.loadChainFromFile(params.certLoc) if not retVal['OK']: return S_ERROR("Cannot load certificate %s: %s" % (params.certLoc, retVal['Message'])) timeLeft = testChain.getRemainingSecs()['Value'] / 86400 if timeLeft < 30: gLogger.notice( "\nYour certificate will expire in %d days. Please renew it!\n" % timeLeft) retVal = testChain.loadKeyFromFile(params.keyLoc, password=params.userPasswd) if not retVal['OK']: passwdPrompt = "Enter Certificate password:"******"\n") else: userPasswd = getpass.getpass(passwdPrompt) params.userPasswd = userPasswd #Find location proxyLoc = params.proxyLoc if not proxyLoc: proxyLoc = Locations.getDefaultProxyLocation() chain = X509Chain() #Load user cert and key retVal = chain.loadChainFromFile(certLoc) if not retVal['OK']: gLogger.warn(retVal['Message']) return S_ERROR("Can't load %s" % certLoc) retVal = chain.loadKeyFromFile(keyLoc, password=params.userPasswd) if not retVal['OK']: gLogger.warn(retVal['Message']) if 'bad decrypt' in retVal['Message']: return S_ERROR("Bad passphrase") return S_ERROR("Can't load %s" % keyLoc) if params.checkWithCS: retVal = chain.generateProxyToFile(proxyLoc, params.proxyLifeTime, strength=params.proxyStrength, limited=params.limitedProxy, rfc=params.rfc) gLogger.info("Contacting CS...") retVal = Script.enableCS() if not retVal['OK']: gLogger.warn(retVal['Message']) if 'Unauthorized query' in retVal['Message']: # add hint for users return S_ERROR( "Can't contact DIRAC CS: %s (User possibly not registered with dirac server) " % retVal['Message']) return S_ERROR("Can't contact DIRAC CS: %s" % retVal['Message']) userDN = chain.getCertInChain(-1)['Value'].getSubjectDN()['Value'] if not params.diracGroup: result = Registry.findDefaultGroupForDN(userDN) if not result['OK']: gLogger.warn("Could not get a default group for DN %s: %s" % (userDN, result['Message'])) else: params.diracGroup = result['Value'] gLogger.info("Default discovered group is %s" % params.diracGroup) gLogger.info("Checking DN %s" % userDN) retVal = Registry.getUsernameForDN(userDN) if not retVal['OK']: gLogger.warn(retVal['Message']) return S_ERROR("DN %s is not registered" % userDN) username = retVal['Value'] gLogger.info("Username is %s" % username) retVal = Registry.getGroupsForUser(username) if not retVal['OK']: gLogger.warn(retVal['Message']) return S_ERROR("User %s has no groups defined" % username) groups = retVal['Value'] if params.diracGroup not in groups: return S_ERROR("Requested group %s is not valid for DN %s" % (params.diracGroup, userDN)) gLogger.info("Creating proxy for %s@%s (%s)" % (username, params.diracGroup, userDN)) if params.summary: h = int(params.proxyLifeTime / 3600) m = int(params.proxyLifeTime / 60) - h * 60 gLogger.notice("Proxy lifetime will be %02d:%02d" % (h, m)) gLogger.notice("User cert is %s" % certLoc) gLogger.notice("User key is %s" % keyLoc) gLogger.notice("Proxy will be written to %s" % proxyLoc) if params.diracGroup: gLogger.notice("DIRAC Group will be set to %s" % params.diracGroup) else: gLogger.notice("No DIRAC Group will be set") gLogger.notice("Proxy strength will be %s" % params.proxyStrength) if params.limitedProxy: gLogger.notice("Proxy will be limited") retVal = chain.generateProxyToFile(proxyLoc, params.proxyLifeTime, params.diracGroup, strength=params.proxyStrength, limited=params.limitedProxy, rfc=params.rfc) if not retVal['OK']: gLogger.warn(retVal['Message']) return S_ERROR("Couldn't generate proxy: %s" % retVal['Message']) return S_OK(proxyLoc)
if self.params.outRelNotes: gLogger.notice( "Leaving a copy of the release notes outside the tarballs" ) baseList.append( "%s/releasenotes.%s.%s" % ( self.params.destination, self.params.name, self.params.version ) ) for baseFileName in baseList: htmlFileName = baseFileName + ".html" try: fd = open( htmlFileName, "w" ) fd.write( parts[ 'whole' ] ) fd.close() except Exception, excp: return S_ERROR( "Could not write %s: %s" % ( htmlFileName, excp ) ) #To pdf pdfCmd = "rst2pdf '%s' -o '%s.pdf'" % ( relNotesRST, baseFileName ) gLogger.verbose( "Executing %s" % pdfCmd ) if os.system( pdfCmd ): gLogger.warn( "Could not generate PDF version of %s" % baseNotesPath ) #Unlink if not necessary if not cliParams.relNotes: try: os.unlink( relNotesRST ) except: pass return S_OK() def __generateTarball( self ): destDir = self.params.destination tarName = "%s-%s.tar.gz" % ( self.params.name, self.params.version ) tarfilePath = os.path.join( destDir, tarName ) dirToTar = os.path.join( self.params.destination, self.params.name ) result = Distribution.writeVersionToInit( dirToTar, self.params.version ) if not result[ 'OK' ]:
gLogger.info(infoStr) except Exception, x: errStr = "SRM2Storage.__init__: Failed to import lcg_util: %s" % ( x) gLogger.exception(errStr) try: import gfalthr as gfal infoStr = "Using gfalthr from: \n%s" % gfal.__file__ gLogger.info(infoStr) infoStr = "The version of gfalthr is %s" % gfal.gfal_version() gLogger.info(infoStr) except Exception, x: errStr = "SRM2Storage.__init__: Failed to import gfalthr: %s." % ( x) gLogger.warn(errStr) try: import gfal infoStr = "Using gfal from: %s" % gfal.__file__ gLogger.info(infoStr) infoStr = "The version of gfal is %s" % gfal.gfal_version() gLogger.info(infoStr) except Exception, x: errStr = "SRM2Storage.__init__: Failed to import gfal: %s" % ( x) gLogger.exception(errStr) defaultProtocols = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', []) gLogger.info('Default list of protocols are: %s' % (', '.join(defaultProtocols)))
def doCommand( self ): """ Uses :meth:`DIRAC.ResourceStatusSystem.Client.ResourceStatusClient.getMonitoredStatus` :params: :attr:`args`: a tuple - `args[0]`: string - should be a ValidRes - `args[1]`: string - should be the name of the ValidRes - `args[2]`: optional string - a ValidRes (get status of THIS ValidRes for name in args[1], will call getGeneralName) :returns: {'MonitoredStatus': 'Active'|'Probing'|'Banned'} """ super( MonitoredStatus_Command, self ).doCommand() if self.client is None: from DIRAC.ResourceStatusSystem.Client.ResourceStatusClient import ResourceStatusClient self.client = ResourceStatusClient( timeout = self.timeout ) try: if len( self.args ) == 3: if ValidRes.index( self.args[2] ) >= ValidRes.index( self.args[0] ): raise InvalidRes, where( self, self.doCommand ) toBeFound = self.client.getGeneralName( self.args[0], self.args[1], self.args[2] ) if not toBeFound[ 'OK' ]: return {'Result' : 'Unknown'} toBeFound = toBeFound['Value'] statuses = self.client.getMonitoredStatus( self.args[2], toBeFound ) if not statuses['OK']: return {'Result' : 'Unknown'} statuses = statuses['Value'] else: toBeFound = self.args[1] statuses = self.client.getMonitoredStatus( self.args[0], toBeFound ) if not statuses['OK']: return {'Result' : 'Unknown'} statuses = statuses['Value'] if not statuses: gLogger.warn( "No status found for %s" % toBeFound ) return {'Result':'Unknown'} except: gLogger.exception( "Exception when calling ResourceStatusClient for %s %s" % ( self.args[0], self.args[1] ) ) return {'Result':'Unknown'} # statuses is a list of statuses. We take the worst returned # status. assert(type(statuses) == list) statuses.sort(key=value_of_status) res = statuses[0] if len(statuses) > 1: gLogger.info( ValidStatus ) gLogger.info( statuses ) return {'Result':res}
def resolveSource(self): """ resolve source SE eligible for submission :param self: self reference """ # Avoid resolving sources twice if self.sourceResolved: return S_OK() # Only resolve files that need a transfer toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get("Status", "") != "Failed" ] if not toResolve: return S_OK() res = self.__updateMetadataCache(toResolve) if not res['OK']: return res res = self.__updateReplicaCache(toResolve) if not res['OK']: return res # Define the source URLs for lfn in toResolve: replicas = self.catalogReplicas.get(lfn, {}) if self.sourceSE not in replicas: gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % (lfn, self.sourceSE)) self.__setFileParameter(lfn, 'Reason', "No replica at SourceSE") self.__setFileParameter(lfn, 'Status', 'Failed') continue res = returnSingleResult(self.oSourceSE.getURL(lfn, protocol='srm')) if not res['OK']: gLogger.warn("resolveSource: skipping %s - %s" % (lfn, res["Message"])) self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue res = self.setSourceSURL(lfn, res['Value']) if not res['OK']: gLogger.warn("resolveSource: skipping %s - %s" % (lfn, res["Message"])) self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue toResolve = [] for lfn in self.fileDict: if "Source" in self.fileDict[lfn]: toResolve.append(lfn) if not toResolve: return S_ERROR("No eligible Source files") # Get metadata of the sources, to check for existance, availability and caching res = self.oSourceSE.getFileMetadata(toResolve) if not res['OK']: return S_ERROR("Failed to check source file metadata") for lfn, error in res['Value']['Failed'].items(): if re.search('File does not exist', error): gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn) self.__setFileParameter(lfn, 'Reason', "Source file does not exist") self.__setFileParameter(lfn, 'Status', 'Failed') else: gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn) self.__setFileParameter(lfn, 'Reason', "Failed to get Source metadata") self.__setFileParameter(lfn, 'Status', 'Failed') toStage = [] nbStagedFiles = 0 for lfn, metadata in res['Value']['Successful'].items(): lfnStatus = self.fileDict.get(lfn, {}).get('Status') if metadata.get('Unavailable', False): gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn) self.__setFileParameter(lfn, 'Reason', "Source file Unavailable") self.__setFileParameter(lfn, 'Status', 'Failed') elif metadata.get('Lost', False): gLogger.warn("resolveSource: skipping %s - source file lost" % lfn) self.__setFileParameter(lfn, 'Reason', "Source file Lost") self.__setFileParameter(lfn, 'Status', 'Failed') elif not metadata.get('Cached', metadata['Accessible']): if lfnStatus != 'Staging': toStage.append(lfn) elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn) self.__setFileParameter(lfn, 'Reason', "Source size mismatch") self.__setFileParameter(lfn, 'Status', 'Failed') elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \ not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ): gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn) self.__setFileParameter(lfn, 'Reason', "Source checksum mismatch") self.__setFileParameter(lfn, 'Status', 'Failed') elif lfnStatus == 'Staging': # file that was staging is now cached self.__setFileParameter(lfn, 'Status', 'Waiting') nbStagedFiles += 1 # Some files were being staged if nbStagedFiles: self.log.info('resolveSource: %d files have been staged' % nbStagedFiles) # Launching staging of files not in cache if toStage: gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len(toStage)) stage = self.oSourceSE.prestageFile(toStage) if not stage["OK"]: gLogger.error("resolveSource: error is prestaging", stage["Message"]) for lfn in toStage: self.__setFileParameter(lfn, 'Reason', stage["Message"]) self.__setFileParameter(lfn, 'Status', 'Failed') else: for lfn in toStage: if lfn in stage['Value']['Successful']: self.__setFileParameter(lfn, 'Status', 'Staging') elif lfn in stage['Value']['Failed']: self.__setFileParameter(lfn, 'Reason', stage['Value']['Failed'][lfn]) self.__setFileParameter(lfn, 'Status', 'Failed') self.sourceResolved = True return S_OK()
def _getPilotOptionsPerSetup(self, setup, pilotDict): """ Given a setup, returns its pilot options in a dictionary """ options = gConfig.getOptionsDict('/Operations/%s/Pilot' % setup) if not options['OK']: gLogger.warn( "Section /Operations/%s/Pilot does not exist: skipping" % setup) return # We include everything that's in the Pilot section for this setup if setup == self.pilotSetup: self.pilotVOVersion = options['Value']['Version'] pilotDict['Setups'][setup] = options['Value'] # We update separately 'GenericPilotDNs' try: pilotDict['GenericPilotDNs'].append( pilotDict['Setups'][setup]['GenericPilotDN']) except KeyError: pass ceTypesCommands = gConfig.getOptionsDict( '/Operations/%s/Pilot/Commands' % setup) if ceTypesCommands['OK']: # It's ok if the Pilot section doesn't list any Commands too pilotDict['Setups'][setup]['Commands'] = {} for ceType in ceTypesCommands['Value']: # FIXME: inconsistent that we break Commands down into a proper list but other things are comma-list strings pilotDict['Setups'][setup]['Commands'][ ceType] = ceTypesCommands['Value'][ceType].split(', ') # pilotDict['Setups'][setup]['Commands'][ceType] = ceTypesCommands['Value'][ceType] if 'CommandExtensions' in pilotDict['Setups'][setup]: # FIXME: inconsistent that we break CommandExtensionss down into a proper # list but other things are comma-list strings pilotDict['Setups'][setup]['CommandExtensions'] = pilotDict[ 'Setups'][setup]['CommandExtensions'].split(', ') # pilotDict['Setups'][setup]['CommandExtensions'] = pilotDict['Setups'][setup]['CommandExtensions'] # Getting the details aboout the MQ Services to be used for logging, if any if 'LoggingMQService' in pilotDict['Setups'][setup]: loggingMQService = gConfig.getOptionsDict( '/Resources/MQServices/%s' % pilotDict['Setups'][setup]['LoggingMQService']) if not loggingMQService['OK']: gLogger.error(loggingMQService['Message']) return loggingMQService pilotDict['Setups'][setup]['Logging'] = {} pilotDict['Setups'][setup]['Logging']['Host'] = loggingMQService[ 'Value']['Host'] pilotDict['Setups'][setup]['Logging']['Port'] = loggingMQService[ 'Value']['Port'] loggingMQServiceQueuesSections = gConfig.getSections( '/Resources/MQServices/%s/Queues' % pilotDict['Setups'][setup]['LoggingMQService']) if not loggingMQServiceQueuesSections['OK']: gLogger.error(loggingMQServiceQueuesSections['Message']) return loggingMQServiceQueuesSections pilotDict['Setups'][setup]['Logging']['Queue'] = {} for queue in loggingMQServiceQueuesSections['Value']: loggingMQServiceQueue = gConfig.getOptionsDict( '/Resources/MQServices/%s/Queues/%s' % (pilotDict['Setups'][setup]['LoggingMQService'], queue)) if not loggingMQServiceQueue['OK']: gLogger.error(loggingMQServiceQueue['Message']) return loggingMQServiceQueue pilotDict['Setups'][setup]['Logging']['Queue'][ queue] = loggingMQServiceQueue['Value'] queuesRes = gConfig.getSections( '/Resources/MQServices/%s/Queues' % pilotDict['Setups'][setup]['LoggingMQService']) if not queuesRes['OK']: return queuesRes queues = queuesRes['Value'] queuesDict = {} for queue in queues: queueOptionRes = gConfig.getOptionsDict( '/Resources/MQServices/%s/Queues/%s' % (pilotDict['Setups'][setup]['LoggingMQService'], queue)) if not queueOptionRes['OK']: return queueOptionRes queuesDict[queue] = queueOptionRes['Value'] pilotDict['Setups'][setup]['Logging']['Queues'] = queuesDict
def execute( self ): ''' Main execution method ''' gMonitor.addMark( 'Iteration', 1 ) # Get all the transformations result = self.transClient.getTransformations( condDict = {'Status': self.transfStatuses }, timeout = 320 ) if not result['OK']: gLogger.error( "UpdateTransformationCounters.execute: Failed to get transformations.", result['Message'] ) return S_OK() # Process each transformation jobsStates = self.transClient.getTransformationCountersStatuses( 'Tasks' )['Value'] filesStates = self.transClient.getTransformationCountersStatuses( 'Files' )['Value'] for transDict in result['Value']: transID = long( transDict['TransformationID'] ) gLogger.debug( "Looking at transformationID %d" % transID ) counterDict = {} counterDict['TransformationID'] = transID #Take care of the Tasks' states gLogger.verbose( "Getting the tasks stats for Transformation %s" % transID ) res = self.transClient.getTransformationTaskStats( transID ) if not res['OK']: gLogger.warn( "Could not get Transformation Task Stats for transformation %s : %s" % ( transID, res['Message'] ) ) break else: taskDict = {} if res['Value']: taskDict = res['Value'] gLogger.verbose( "Got %s tasks dict for transformation %s" % ( str( taskDict ), transID ) ) for state in jobsStates: counterDict[state] = taskDict.get( state, 0 ) else: gLogger.warn( "No Task Statuses found" ) break #Now look for the files' states gLogger.verbose( "Getting the files stats for Transformation %s" % transID ) res = self.transClient.getTransformationStats( transID ) if not res['OK']: gLogger.warn( "Could not get Transformation Stats for transformation %s : %s" % ( transID, res['Message'] ) ) break else: fileDict = {} if res['Value']: fileDict = res['Value'] gLogger.debug( "Got %s file dict for transformation %s" % ( str( fileDict ), transID ) ) for state in filesStates: counterDict[state] = fileDict.get( state, 0 ) else: gLogger.warn( "No File Statuses found" ) break gLogger.verbose( "Updating the counters for transformation %s" % transID ) res = self.transClient.updateTransformationCounters( counterDict ) if not res['OK']: gLogger.error( "Failed updating counters for transformation %s: %s" % ( transID, res['Message'] ) ) else: gLogger.verbose( "Updated the counters of transformation %s" % transID ) return S_OK()
def getStorages(self, storageName, pluginList=None, hideExceptions=False): """Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS :param storageName: is the DIRAC SE name i.e. 'CERN-RAW' :param pluginList: is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1'] :return: dictionary containing storage elements and information about them """ self.remotePlugins = [] self.localPlugins = [] self.name = "" self.options = {} self.protocols = {} self.storages = [] if pluginList is None: pluginList = [] elif isinstance(pluginList, six.string_types): pluginList = [pluginList] if not self.vo: gLogger.warn("No VO information available") # Get the name of the storage provided res = self._getConfigStorageName(storageName, "Alias") if not res["OK"]: return res storageName = res["Value"] self.name = storageName # In case the storage is made from a base SE, get this information res = self._getConfigStorageName(storageName, "BaseSE") if not res["OK"]: return res # If the storage is derived frmo another one, keep the information # We initialize the seConfigPath to SE_BASE_CONFIG_PATH if there is a derivedSE, SE_CONFIG_PATH if not if res["Value"] != storageName: derivedStorageName = storageName storageName = res["Value"] seConfigPath = SE_BASE_CONFIG_PATH else: derivedStorageName = None seConfigPath = SE_CONFIG_PATH # Get the options defined in the CS for this storage res = self._getConfigStorageOptions( storageName, derivedStorageName=derivedStorageName, seConfigPath=seConfigPath) if not res["OK"]: return res self.options = res["Value"] # Get the protocol specific details res = self._getConfigStorageProtocols( storageName, derivedStorageName=derivedStorageName, seConfigPath=seConfigPath) if not res["OK"]: return res self.protocols = res["Value"] requestedLocalPlugins = [] requestedRemotePlugins = [] requestedProtocolDetails = [] turlProtocols = [] # Generate the protocol specific plug-ins for protocolSection, protocolDetails in self.protocols.items(): pluginName = protocolDetails.get("PluginName", protocolSection) if pluginList and pluginName not in pluginList: continue protocol = protocolDetails["Protocol"] result = self.__generateStorageObject( storageName, pluginName, protocolDetails, hideExceptions=hideExceptions) if result["OK"]: self.storages.append(result["Value"]) if pluginName in self.localPlugins: turlProtocols.append(protocol) requestedLocalPlugins.append(pluginName) if pluginName in self.remotePlugins: requestedRemotePlugins.append(pluginName) requestedProtocolDetails.append(protocolDetails) else: gLogger.info(result["Message"]) if self.storages: resDict = {} resDict["StorageName"] = self.name resDict["StorageOptions"] = self.options resDict["StorageObjects"] = self.storages resDict["LocalPlugins"] = requestedLocalPlugins resDict["RemotePlugins"] = requestedRemotePlugins resDict["ProtocolOptions"] = requestedProtocolDetails resDict["TurlProtocols"] = turlProtocols return S_OK(resDict) else: errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols." gLogger.error(errStr, self.name) return S_ERROR(errStr)
def _syncScripts(self): """Clone the pilot scripts from the repository and upload them to the web server """ gLogger.info('-- Uploading the pilot scripts --') tarFiles = [] # Extension, if it exists if self.pilotVORepo: if os.path.isdir('pilotVOLocalRepo'): shutil.rmtree('pilotVOLocalRepo') os.mkdir('pilotVOLocalRepo') repo_VO = Repo.init('pilotVOLocalRepo') upstream = repo_VO.create_remote('upstream', self.pilotVORepo) upstream.fetch() upstream.pull(upstream.refs[0].remote_head) if repo_VO.tags: repo_VO.git.checkout(repo_VO.tags[self.pilotVOVersion], b='pilotVOScripts') else: repo_VO.git.checkout('upstream/master', b='pilotVOScripts') scriptDir = (os.path.join('pilotVOLocalRepo', self.projectDir, self.pilotVOScriptPath, "*.py")) for fileVO in glob.glob(scriptDir): result = self._upload(filename=os.path.basename(fileVO), pilotScript=fileVO) if not result['OK']: gLogger.error("Error uploading the VO pilot script: %s" % result['Message']) tarFiles.append(fileVO) else: gLogger.warn( "The /Operations/<Setup>/Pilot/pilotVORepo option is not defined" ) # DIRAC repo if os.path.isdir('pilotLocalRepo'): shutil.rmtree('pilotLocalRepo') os.mkdir('pilotLocalRepo') repo = Repo.init('pilotLocalRepo') upstream = repo.create_remote('upstream', self.pilotRepo) upstream.fetch() upstream.pull(upstream.refs[0].remote_head) if repo.tags: if self.pilotVORepo: localRepo = 'pilotVOLocalRepo' else: localRepo = 'pilotLocalRepo' with open(os.path.join(localRepo, self.projectDir, 'releases.cfg'), 'r') as releasesFile: lines = [line.rstrip('\n') for line in releasesFile] lines = [s.strip() for s in lines] if self.pilotVOVersion in lines: self.pilotVersion = lines[ (lines.index(self.pilotVOVersion)) + 3].split(':')[1] repo.git.checkout(repo.tags[self.pilotVersion], b='pilotScripts') else: repo.git.checkout('upstream/master', b='pilotScripts') try: scriptDir = os.path.join('pilotLocalRepo', self.pilotScriptPath, "*.py") for filename in glob.glob(scriptDir): result = self._upload(filename=os.path.basename(filename), pilotScript=filename) if not result['OK']: gLogger.error("Error uploading the pilot script: %s" % result['Message']) tarFiles.append(filename) if not os.path.isfile( os.path.join('pilotLocalRepo', self.pilotScriptPath, "dirac-install.py")): result = self._upload(filename='dirac-install.py', pilotScript=os.path.join( 'pilotLocalRepo', "Core/scripts/dirac-install.py")) if not result['OK']: gLogger.error("Error uploading dirac-install.py: %s" % result['Message']) tarFiles.append('dirac-install.py') with tarfile.TarFile(name='pilot.tar', mode='w') as tf: pwd = os.getcwd() for ptf in tarFiles: shutil.copyfile(ptf, os.path.join(pwd, os.path.basename(ptf))) tf.add(os.path.basename(ptf), recursive=False) result = self._upload(filename='pilot.tar', pilotScript='pilot.tar') if not result['OK']: gLogger.error("Error uploading pilot.tar: %s" % result['Message']) return result except ValueError: gLogger.error("Error uploading the pilot scripts: %s" % result['Message']) return result return S_OK()
def _insertFiles(self, lfns, uid, gid, connection=False): connection = self._getConnection(connection) # Add the files failed = {} insertTuples = [] res = self.db.getStatusInt('AprioriGood', connection=connection) statusID = 0 if res['OK']: statusID = res['Value'] directorySESizeDict = {} for lfn in lfns.keys(): dirID = lfns[lfn]['DirID'] fileName = os.path.basename(lfn) size = lfns[lfn]['Size'] ownerDict = lfns[lfn].get('Owner', None) s_uid = uid s_gid = gid if ownerDict: result = self.db.ugManager.getUserAndGroupID(ownerDict) if result['OK']: s_uid, s_gid = result['Value'] insertTuples.append( "(%d,%d,%d,%d,%d,'%s')" % (dirID, size, s_uid, s_gid, statusID, fileName)) directorySESizeDict.setdefault(dirID, {}) directorySESizeDict[dirID].setdefault(0, {'Files': 0, 'Size': 0}) directorySESizeDict[dirID][0]['Size'] += lfns[lfn]['Size'] directorySESizeDict[dirID][0]['Files'] += 1 req = "INSERT INTO FC_Files (DirID,Size,UID,GID,Status,FileName) VALUES %s" % ( ','.join(insertTuples)) res = self.db._update(req, connection) if not res['OK']: return res # Get the fileIDs for the inserted files res = self._findFiles(lfns.keys(), ['FileID'], connection=connection) if not res['OK']: for lfn in lfns.keys(): failed[lfn] = 'Failed post insert check' lfns.pop(lfn) else: failed.update(res['Value']['Failed']) for lfn in res['Value']['Failed'].keys(): lfns.pop(lfn) for lfn, fileDict in res['Value']['Successful'].items(): lfns[lfn]['FileID'] = fileDict['FileID'] insertTuples = [] toDelete = [] for lfn in lfns.keys(): fileInfo = lfns[lfn] fileID = fileInfo['FileID'] dirID = fileInfo['DirID'] checksum = fileInfo['Checksum'] checksumtype = fileInfo.get('ChecksumType', 'Adler32') guid = fileInfo.get('GUID', '') mode = fileInfo.get('Mode', self.db.umask) toDelete.append(fileID) insertTuples.append( "(%d,'%s','%s','%s',UTC_TIMESTAMP(),UTC_TIMESTAMP(),%d)" % (fileID, guid, checksum, checksumtype, mode)) if insertTuples: req = "INSERT INTO FC_FileInfo (FileID,GUID,Checksum,CheckSumType,CreationDate,ModificationDate,Mode) VALUES %s" % ','.join( insertTuples) res = self.db._update(req) if not res['OK']: self._deleteFiles(toDelete, connection=connection) for lfn in lfns.keys(): failed[lfn] = res['Message'] lfns.pop(lfn) else: # Update the directory usage result = self._updateDirectoryUsage(directorySESizeDict, '+', connection=connection) if not result['OK']: gLogger.warn("Failed to insert FC_DirectoryUsage", result['Message']) return S_OK({'Successful': lfns, 'Failed': failed})
try: sandboxSize = 0 tf = tarfile.open(name=tarFileName, mode="r") for tarinfo in tf: tf.extract(tarinfo, path=destinationDir) sandboxSize += tarinfo.size tf.close() result['Value'] = sandboxSize except Exception, e: result = S_ERROR("Could not open bundle: %s" % str(e)) try: os.unlink(tarFileName) os.rmdir(tmpSBDir) except Exception, e: gLogger.warn("Could not remove temporary dir %s: %s" % (tmpSBDir, str(e))) return result ############## # Jobs def getSandboxesForJob(self, jobId): return self.__getSandboxesForEntity("Job:%s" % jobId) def assignSandboxesToJob(self, jobId, sbList, ownerName="", ownerGroup="", eSetup=""):
def getObjects(self, modulePath, reFilter=None, parentClass=None, recurse=False): """ Search for modules under a certain path modulePath is the import string needed to access the parent module. Root modules will be included automatically (like DIRAC). For instance "ConfigurationSystem.Service" reFilter is a regular expression to filter what to load. For instance ".*Handler" parentClass is a class object from which the loaded modules have to import from. For instance RequestHandler """ if 'OrderedDict' in dir(collections): modules = collections.OrderedDict() else: modules = {} if type(reFilter) in types.StringTypes: reFilter = re.compile(reFilter) for rootModule in self.__rootModules: if rootModule: impPath = "%s.%s" % (rootModule, modulePath) else: impPath = modulePath gLogger.debug("Trying to load %s" % impPath) result = self.__recurseImport(impPath) if not result['OK']: return result if not result['Value']: continue parentModule = result['Value'] fsPath = parentModule.__path__[0] gLogger.verbose("Loaded module %s at %s" % (impPath, fsPath)) for modLoader, modName, isPkg in pkgutil.walk_packages( parentModule.__path__): if reFilter and not reFilter.match(modName): continue if isPkg: if recurse: result = self.getObjects("%s.%s" % (modulePath, modName), reFilter=reFilter, parentClass=parentClass, recurse=recurse) if not result['OK']: return result modules.update(result['Value']) continue modKeyName = "%s.%s" % (modulePath, modName) if modKeyName in modules: continue fullName = "%s.%s" % (impPath, modName) result = self.__recurseImport(modName, parentModule=parentModule, fullName=fullName) if not result['OK']: return result if not result['Value']: continue modObj = result['Value'] try: modClass = getattr(modObj, modName) except AttributeError: gLogger.warn("%s does not contain a %s object" % (fullName, modName)) continue if parentClass and not issubclass(modClass, parentClass): continue #Huge success! modules[modKeyName] = modClass return S_OK(modules)
def _getConfigStorageProtocolDetails(self, storageName, protocolSection, seConfigPath=SE_CONFIG_PATH, checkAccess=True): """ Parse the contents of the protocol block :param storageName: is the storage section to check in the CS :param protocolSection: name of the protocol section to find information :param seConfigPath: the path of the storage section. It can be /Resources/StorageElements or StorageElementBases :param checkAccess: if not set, don't complain if "Access" is not in the section :return: dictionary of the protocol options """ # First obtain the options that are available protocolConfigPath = cfgPath(seConfigPath, storageName, protocolSection) res = gConfig.getOptions(protocolConfigPath) if not res["OK"]: errStr = "StorageFactory.__getProtocolDetails: Failed to get protocol options." gLogger.error(errStr, "%s: %s" % (storageName, protocolSection)) return S_ERROR(errStr) options = res["Value"] # We must have certain values internally even if not supplied in CS protocolDict = { "Access": "", "Host": "", "Path": "", "Port": "", "Protocol": "", "SpaceToken": "", "WSUrl": "" } for option in options: configPath = cfgPath(protocolConfigPath, option) optionValue = gConfig.getValue(configPath, "") protocolDict[option] = optionValue # Evaluate the base path taking into account possible VO specific setting if self.vo: result = gConfig.getOptionsDict( cfgPath(protocolConfigPath, "VOPath")) voPath = "" if result["OK"]: voPath = result["Value"].get(self.vo, "") if voPath: protocolDict["Path"] = voPath # Now update the local and remote protocol lists. # A warning will be given if the Access option is not set and the plugin is not already in remote or local. plugin = protocolDict.get("PluginName", protocolSection) if protocolDict["Access"].lower() == "remote": self.remotePlugins.append(plugin) elif protocolDict["Access"].lower() == "local": self.localPlugins.append(plugin) # If it is a derived SE, this is normal, no warning elif checkAccess and protocolSection not in self.protocols: errStr = ( "StorageFactory.__getProtocolDetails: The 'Access' option \ for %s:%s is neither 'local' or 'remote'." % (storageName, protocolSection)) gLogger.warn(errStr) return S_OK(protocolDict)