def __submit( self, site, CE, vo ): """ set the job and submit. """ job = Job() job.setName( self.testType ) job.setJobGroup( 'CE-Test' ) job.setExecutable( self.executable ) job.setInputSandbox( '%s/%s' % ( self.__scriptPath, self.executable ) ) if site and not CE: job.setDestination( site ) if CE: job.setDestinationCE( CE ) LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath result = self.dirac.submit( job ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() return result
def __getJobOutput( self, jobID, vo ): status = self.dirac.status( jobID ) if not status[ 'OK' ]: return status status = status[ 'Value' ][ jobID ][ 'Status' ] if status in ( 'Done', 'Failed' ): LOCK.acquire() proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: LOCK.release() return proxyPath proxyPath = proxyPath[ 'Value' ] oldProxy = os.environ.get( 'X509_USER_PROXY' ) os.environ[ 'X509_USER_PROXY' ] = proxyPath outputRes = self.dirac.getOutputSandbox( jobID, self.__logPath ) if oldProxy is None: del os.environ[ 'X509_USER_PROXY' ] else: os.environ[ 'X509_USER_PROXY' ] = oldProxy LOCK.release() if not outputRes[ 'OK' ]: ret = S_OK( { 'Download' : False, 'Log' : outputRes[ 'Message' ] } ) else: try: logfile = open( '%s/%d/Script1_CodeOutput.log' % ( self.__logPath, jobID ), 'r' ) log = logfile.read() logfile.close() except IOError, e: raise IOError os.system( 'rm -rf %s/%d' % ( self.__logPath, jobID ) ) ret = S_OK( { 'Download' : True, 'Log' : log } )
def getTestResult( self, elementName, vo, jobID, submissionTime ): """ download output sandbox and judge the test status from the log file. """ isFinish = False res = self.__getJobOutput( jobID, vo ) if not res[ 'OK' ]: return res output = res[ 'Value' ] status = res[ 'Status' ] resDict = { 'CompletionTime' : None, 'Status' : None, 'Log' : None, 'ApplicationTime' : None } utcNow = datetime.utcnow().replace( microsecond = 0 ) if output: isFinish = True resDict[ 'CompletionTime' ] = utcNow log = output[ 'Log' ] if not output[ 'Download' ]: resDict[ 'Status' ] = 'Unknown' resDict[ 'Log' ] = 'Fail to download log file for job %s: %s' % ( jobID, log ) else: resDict[ 'Log' ] = log resDict[ 'Status' ] = self._judge( log ) resDict[ 'AppliactionTime' ] = self.__getAppRunningTime( log ) else: if utcNow - submissionTime >= timedelta( seconds = self.timeout ): isFinish = True if elementName.split( '.' )[ 0 ] == 'CLOUD': site = elementName else: site = BESUtils.getSiteForCE( elementName ) jobCount = self.wmsAdmin.getSiteSummaryWeb( { 'Site' : site }, [], 0, 0 ) if not jobCount[ 'OK' ]: return jobCount params = jobCount[ 'Value' ][ 'ParameterNames' ] records = jobCount[ 'Value' ][ 'Records' ][ 0 ] run = records[ params.index( 'Running' ) ] done = records[ params.index( 'Done' ) ] if status == 'Waiting' and run == 0 and done == 0: resDict[ 'Status' ] = 'Bad' resDict[ 'Log' ] = 'The test job is waiting for %d seconds, but no running and done jobs at this site.' % self.timeout else: if run != 0: resDict[ 'Status' ] = 'Busy' resDict[ 'Log' ] = 'Site %s is too busy to execute this test job, job status is %s' % ( site, status ) else: resDict[ 'Status' ] = 'Unknown' resDict[ 'Log' ] = 'Test did not complete within the timeout of %d seconds, job status is %s' % ( self.timeout, status ) self.dirac.kill( jobID ) if not isFinish: return S_OK() else: return S_OK( resDict )
def export_getSiteVO(self, siteName): """ Returns the VO for the given site. """ gLogger.info('getSiteVO') vos = BESUtils.getSiteVO( siteName ) return S_OK( vos )
def doTest( self, elementDict ): """ Test upload and download for specified SE. """ elementName = elementDict[ 'ElementName' ] vo = elementDict[ 'VO' ] testFilePath = self.__localPath + self.__testFile if not os.path.exists( testFilePath ) or not os.path.isfile( testFilePath ): f = open( testFilePath, 'w' ) f.write( 'hello' ) f.close() status = 'OK' log = '' lfnPath = self.__lfnPath.format(vo=vo) + elementName + '-' + self.__testFile submissionTime = datetime.utcnow().replace( microsecond = 0 ) proxyPath = BESUtils.getProxyByVO( 'zhangxm', vo ) if not proxyPath[ 'OK' ]: gLogger.error('Can not get proxy for VO %s' % vo) return proxyPath proxyPath = proxyPath[ 'Value' ] env_test = os.environ.copy() env_test[ 'X509_USER_PROXY' ] = proxyPath cmd = [os.path.join(self.__scriptPath, self.__scriptName), '-o', '/DIRAC/Security/UseServerCertificate=no', lfnPath, testFilePath, elementName] result = systemCall(300, cmd, env=env_test) print result if not result['OK']: status = 'Bad' log += 'Call %s failed: %s' % (self.__scriptName, result['Message']) elif result['Value'][0] != 0: status = 'Bad' log += '%s exit with error %s:\n%s' % (self.__scriptName, result['Value'][0], result['Value'][1]) else: log += '%s exit successfully:\n%s' % (self.__scriptName, result['Value'][1]) completionTime = datetime.utcnow().replace( microsecond = 0 ) applicationTime = ( completionTime - submissionTime ).total_seconds() result = { 'Result' : { 'Status' : status, 'Log' : log, 'SubmissionTime' : submissionTime, 'CompletionTime' : completionTime, 'ApplicationTime' : applicationTime }, 'Finish' : True } # if os.path.exists( testFilePath ) and os.path.isfile( testFilePath ): # os.remove( testFilePath ) localFile = self.__localPath + elementName +'-' + self.__testFile if os.path.exists( localFile ) and os.path.isfile( localFile ): os.remove( localFile ) return S_OK( result )
def execute(self): """ The main method of the agent. It get elements which need to be tested and evaluated from CS. Then it instantiates TestExecutor and StatusEvaluate and calls their main method to finish all the work. """ from BESDIRAC.ResourceStatusSystem.SAM.SAMTest import TestConfiguration self.tests = TestConfiguration.TESTS self.__loadTestObj() self.testExecutor = TestExecutor( self.tests, self.apis ) self.statusEvaluator = StatusEvaluator( self.apis ) elements = [] sitesCEs = {} # CE tests noTestSites = [ site.strip() for site in self.am_getOption( 'noTestSite', '' ).split( ',' ) if site != '' ] diracAdmin = DiracAdmin() activeSites = diracAdmin.getSiteMask() # wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator') # activeSites = wmsAdmin.getSiteMask() if not activeSites[ 'OK' ]: return activeSites activeSites = [ site for site in activeSites[ 'Value' ] if site not in noTestSites ] gLogger.info('Active sites: %s', activeSites) for siteName in activeSites: domain = siteName.split('.')[ 0 ] vos = BESUtils.getSiteVO( siteName ) if 'CLOUD' != domain: siteCEs = CSHelpers.getSiteComputingElements( siteName ) sitesCEs[ siteName ] = siteCEs for ce in siteCEs: elements.append( { 'ElementName' : ce, 'ElementType' : 'ComputingElement', 'VO' : vos } ) gLogger.debug("List of elements: %s" % ce) else: sitesCEs[ siteName ] = [ siteName ] elements.append( { 'ElementName' : siteName, 'ElementType' : 'CLOUD', 'VO' : vos } ) # SE tests ses = gConfig.getValue( 'Resources/StorageElementGroups/SE-USER' ) for se in ses.split( ', ' ): seSites = BESUtils.getSitesForSE( se ) for seSite in seSites: gLogger.debug( 'Site for SE %s: %s' % (se, seSite) ) if seSite not in activeSites: continue vos = BESUtils.getSiteVO( seSite ) gLogger.debug( 'vos for SE %s under site %s: %s' % (se, seSite, vos) ) if len(vos) == 0: continue vo = vos[0] elements.append( { 'ElementName' : se, 'ElementType' : 'StorageElement', 'VO' : vo } ) gLogger.info( 'VO for SE %s: %s' % ( se, vo ) ) break lastCheckTime = datetime.utcnow().replace(microsecond = 0) self.elementsStatus = {} threads = [] for elementDict in elements: t = threading.Thread( target = self._execute, args = ( elementDict, ) ) threads.append( t ) t.start() for thread in threads: thread.join() for siteName in activeSites: seList = CSHelpers.getSiteStorageElements( siteName ) se = '' if [] != seList: se = seList[ 0 ] try: seStatus = self.elementsStatus[ se ][ 'all' ] except KeyError: seStatus = None voStatus = { 'all' : [] } for ce in sitesCEs[ siteName ]: if not self.elementsStatus.has_key( ce ): continue for vo, status in self.elementsStatus[ ce ].items(): if vo not in voStatus: voStatus[ vo ] = [] voStatus[ vo ].append( status ) for vo, ceStatusList in voStatus.items(): if ceStatusList == [] and seStatus == None: continue res = self.statusEvaluator.evaluateSiteStatus( siteName, ceStatusList, seStatus, vo = vo, lastCheckTime = lastCheckTime) if not res[ 'OK' ]: gLogger.error( 'StatusEvaluator.evaluateSiteStatus: %s' % res[ 'Message' ] ) break return S_OK()
def execute(self): """ The main method of the agent. It get elements which need to be tested and evaluated from CS. Then it instantiates TestExecutor and StatusEvaluate and calls their main method to finish all the work. """ elements = [] sitesCEs = {} ses = gConfig.getValue( 'Resources/StorageElementGroups/SE-USER' ) for se in ses.split( ', ' ): elements.append( { 'ElementName' : se, 'ElementType' : 'StorageElement' } ) wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator') activeSites = wmsAdmin.getSiteMask() if not activeSites[ 'OK' ]: return activeSites activeSites = activeSites[ 'Value' ] for siteName in activeSites: domain = siteName.split('.')[ 0 ] vos = BESUtils.getSiteVO( siteName ) if 'CLOUD' != domain: siteCEs = CSHelpers.getSiteComputingElements( siteName ) sitesCEs[ siteName ] = siteCEs for ce in siteCEs: elements.append( { 'ElementName' : ce, 'ElementType' : 'ComputingElement', 'VO' : vos } ) else: sitesCEs[ siteName ] = [ siteName ] elements.append( { 'ElementName' : siteName, 'ElementType' : 'CLOUD', 'VO' : vos } ) lastCheckTime = datetime.utcnow().replace(microsecond = 0) self.elementsStatus = {} threads = [] for elementDict in elements: t = threading.Thread( target = self._execute, args = ( elementDict, ) ) threads.append( t ) t.start() for thread in threads: thread.join() for siteName in activeSites: seList = CSHelpers.getSiteStorageElements( siteName ) se = '' if [] != seList: se = seList[ 0 ] try: seStatus = self.elementsStatus[ se ][ 'all' ] except KeyError: seStatus = None voStatus = { 'all' : [] } for ce in sitesCEs[ siteName ]: if not self.elementsStatus.has_key( ce ): continue for vo, status in self.elementsStatus[ ce ].items(): if vo not in voStatus: voStatus[ vo ] = [] voStatus[ vo ].append( status ) for vo, ceStatusList in voStatus.items(): if ceStatusList == [] and seStatus == None: continue res = self.statusEvaluator.evaluateSiteStatus( siteName, ceStatusList, seStatus, vo = vo, lastCheckTime = lastCheckTime) if not res[ 'OK' ]: gLogger.error( 'StatusEvaluator.evaluateSiteStatus: %s' % res[ 'Message' ] ) break return S_OK()