def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM, catch_sig) signal.signal(signal.SIGALRM, catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params clientid = 'PANDA-' + socket.getfqdn() queue = '/queue/Consumer.test1.poc.pocMSG' ssl_opts = { 'use_ssl': True, 'ssl_cert_file': '%s/hostcert.pem' % panda_config.certdir, 'ssl_key_file': '%s/hostkey.pem' % panda_config.certdir } # resolve multiple brokers brokerList = socket.gethostbyname_ex('gridmsg007.cern.ch')[-1] # set listener for tmpBroker in brokerList: try: _logger.debug('setting listener on %s' % tmpBroker) conn = stomp.Connection(host_and_ports=[(tmpBroker, 6162)], **ssl_opts) conn.set_listener( 'GenCallbackConsumer', GenCallbackConsumer(conn, taskBuffer, siteMapper)) conn.start() conn.connect(headers={'client-id': clientid}) conn.subscribe(destination=queue, ack='client-individual') #,headers = {'selector':"cbtype='FileDoneMessage'"}) if not conn.is_connected(): _logger.error("connection failure to %s" % tmpBroker) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker, errtype, errvalue)) catch_sig(None, None)
def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM, catch_sig) signal.signal(signal.SIGALRM, catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # check certificate certName = '%s/pandasv1_usercert.pem' % panda_config.certdir keyName = '%s/pandasv1_userkey.pem' % panda_config.certdir _logger.debug('checking certificate {0}'.format(certName)) certOK, certMsg = DataServiceUtils.checkCertificate(certName) if not certOK: _logger.error('bad certificate : {0}'.format(certMsg)) # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params queue = '/queue/Consumer.PANDA.atlas.ddm.siteservices' ssl_opts = { 'use_ssl': True, 'ssl_version': ssl.PROTOCOL_TLSv1, 'ssl_cert_file': certName, 'ssl_key_file': keyName } # resolve multiple brokers brokerList = socket.gethostbyname_ex('atlas-mb.cern.ch')[-1] # set listener connList = [] for tmpBroker in brokerList: try: clientid = 'PANDA-' + socket.getfqdn() + '-' + tmpBroker subscription_id = 'panda-server-consumer-' + socket.getfqdn() _logger.debug('setting listener %s' % clientid) conn = stomp.Connection(host_and_ports=[(tmpBroker, 61023)], **ssl_opts) connList.append(conn) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("failed to connect to %s : %s %s" % (tmpBroker, errtype, errvalue)) catch_sig(None, None) while True: for conn in connList: try: if not conn.is_connected(): conn.set_listener( 'FileCallbackListener', FileCallbackListener(conn, taskBuffer, siteMapper, subscription_id)) conn.start() conn.connect(headers={'client-id': clientid}) conn.subscribe(destination=queue, id=subscription_id, ack='client-individual') _logger.debug('listener %s is up and running' % clientid) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker, errtype, errvalue)) catch_sig(None, None) time.sleep(5)
def run(self): # start try: byCallback = False if self.job == None: byCallback = True _logger.debug("start: %s" % self.dataset.name) _logger.debug("callback from %s" % self.site) # FIXME when callback from BNLPANDA disappeared if self.site == 'BNLPANDA': self.site = 'BNL-OSG2_ATLASMCDISK' # instantiate site mapper siteMapper = SiteMapper(self.taskBuffer) # get computingSite/destinationSE computingSite, destinationSE = self.taskBuffer.getDestSE( self.dataset.name) if destinationSE == None: # try to get computingSite/destinationSE from ARCH to delete sub # even if no active jobs left computingSite, destinationSE = self.taskBuffer.getDestSE( self.dataset.name, True) if destinationSE == None: _logger.error("cannot get source/destination for %s" % self.dataset.name) _logger.debug("end: %s" % self.dataset.name) return _logger.debug("src: %s" % computingSite) _logger.debug("dst: %s" % destinationSE) # get corresponding token tmpSrcSiteSpec = siteMapper.getSite(computingSite) tmpDstSiteSpec = siteMapper.getSite(destinationSE) _logger.debug(tmpDstSiteSpec.setokens) destToken = None for tmpToken, tmpDdmId in tmpDstSiteSpec.setokens.iteritems(): if self.site == tmpDdmId: destToken = tmpToken break _logger.debug("use Token=%s" % destToken) # get required tokens reqTokens = self.taskBuffer.getDestTokens(self.dataset.name) if reqTokens == None: _logger.error("cannot get required token for %s" % self.dataset.name) _logger.debug("end: %s" % self.dataset.name) return _logger.debug("req Token=%s" % reqTokens) # make bitmap for the token bitMap = 1 if len(reqTokens.split(',')) > 1: for tmpReqToken in reqTokens.split(','): if tmpReqToken == destToken: break # shift one bit bitMap <<= 1 # completed bitmap compBitMap = (1 << len(reqTokens.split(','))) - 1 # ignore the lowest bit for T1, file on DISK is already there if tmpSrcSiteSpec.ddm == tmpDstSiteSpec.ddm: compBitMap = compBitMap & 0xFFFE # update bitmap in DB updatedBitMap = self.taskBuffer.updateTransferStatus( self.dataset.name, bitMap) _logger.debug( "transfer status:%s - comp:%s - bit:%s" % (hex(updatedBitMap), hex(compBitMap), hex(bitMap))) # update output files if (updatedBitMap & compBitMap) == compBitMap: ids = self.taskBuffer.updateOutFilesReturnPandaIDs( self.dataset.name) # set flag for T2 cleanup self.dataset.status = 'cleanup' self.taskBuffer.updateDatasets([self.dataset]) else: _logger.debug("end: %s" % self.dataset.name) return else: _logger.debug("start: %s" % self.job.PandaID) # update input files ids = [self.job.PandaID] _logger.debug("IDs: %s" % ids) if len(ids) != 0: # get job if self.job == None: jobs = self.taskBuffer.peekJobs(ids, fromDefined=False, fromArchived=False, fromWaiting=False) else: jobs = [self.job] # loop over all jobs for job in jobs: if job == None: continue _logger.debug("Job: %s" % job.PandaID) if job.jobStatus == 'transferring': jobReady = True failedFiles = [] noOutFiles = [] # check file status for file in job.Files: if file.type == 'output' or file.type == 'log': if file.status == 'failed': failedFiles.append(file.lfn) elif file.status == 'nooutput': noOutFiles.append(file.lfn) elif file.status != 'ready': _logger.debug( "Job: %s file:%s %s != ready" % (job.PandaID, file.lfn, file.status)) jobReady = False break # finish job if jobReady: if byCallback: _logger.debug("Job: %s all files ready" % job.PandaID) else: _logger.debug( "Job: %s all files checked with catalog" % job.PandaID) # create XML try: import xml.dom.minidom dom = xml.dom.minidom.getDOMImplementation() doc = dom.createDocument(None, 'xml', None) topNode = doc.createElement("POOLFILECATALOG") for file in job.Files: if file.type in ['output', 'log']: # skip failed or no-output files if file.lfn in failedFiles + noOutFiles: continue # File fileNode = doc.createElement("File") fileNode.setAttribute("ID", file.GUID) # LFN logNode = doc.createElement("logical") lfnNode = doc.createElement("lfn") lfnNode.setAttribute('name', file.lfn) # metadata fsizeNode = doc.createElement( "metadata") fsizeNode.setAttribute( "att_name", "fsize") fsizeNode.setAttribute( "att_value", str(file.fsize)) # checksum if file.checksum.startswith('ad:'): # adler32 chksumNode = doc.createElement( "metadata") chksumNode.setAttribute( "att_name", "adler32") chksumNode.setAttribute( "att_value", re.sub('^ad:', '', file.checksum)) else: # md5sum chksumNode = doc.createElement( "metadata") chksumNode.setAttribute( "att_name", "md5sum") chksumNode.setAttribute( "att_value", re.sub('^md5:', '', file.checksum)) # append nodes logNode.appendChild(lfnNode) fileNode.appendChild(logNode) fileNode.appendChild(fsizeNode) fileNode.appendChild(chksumNode) topNode.appendChild(fileNode) # status in file name if failedFiles == []: statusFileName = 'finished' else: statusFileName = 'failed' # write to file xmlFile = '%s/%s_%s_%s' % ( panda_config.logdir, job.PandaID, statusFileName, commands.getoutput('uuidgen')) oXML = open(xmlFile, "w") oXML.write(topNode.toxml()) oXML.close() except: type, value, traceBack = sys.exc_info() _logger.error("%s : %s %s" % (job.PandaID, type, value)) _logger.debug("Job: %s status: %s" % (job.PandaID, job.jobStatus)) # end if self.job == None: _logger.debug("end: %s" % self.dataset.name) else: _logger.debug("end: %s" % self.job.PandaID) except: type, value, traceBack = sys.exc_info() _logger.error("run() : %s %s" % (type, value))
def getSiteMapper(self): return SiteMapper(self.taskBuffer)
startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: _logger.debug("old process : %s %s" % (pid,startTime)) _logger.debug(line) commands.getoutput('kill -9 %s' % pid) except: type, value, traceBack = sys.exc_info() _logger.error("kill process : %s %s" % (type,value)) # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) _memoryCheck("rebroker") # rebrokerage _logger.debug("Rebrokerage start") # get timeout value timeoutVal = taskBuffer.getConfigValue('rebroker','ANALY_TIMEOUT') if timeoutVal is None: timeoutVal = 12 _logger.debug("timeout value : {0}h".format(timeoutVal)) try: normalTimeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=timeoutVal) sortTimeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=3) sql = "SELECT jobDefinitionID,prodUserName,prodUserID,computingSite,MAX(modificationTime),jediTaskID,processingType "
# password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('prioryMassage') tmpLog = LogWrapper(_logger) tmpLog.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # get usage breakdown usageBreakDownPerUser = {} usageBreakDownPerSite = {} workingGroupList = [] for table in ['ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsArchived4']: varMap = {} varMap[':prodSourceLabel'] = 'user' if table == 'ATLAS_PANDA.jobsActive4': sql = "SELECT COUNT(*),prodUserName,jobStatus,workingGroup,computingSite FROM %s WHERE prodSourceLabel=:prodSourceLabel GROUP BY prodUserName,jobStatus,workingGroup,computingSite" % table else: # with time range for archived table varMap[':modificationTime'] = datetime.datetime.utcnow( ) - datetime.timedelta(minutes=60) sql = "SELECT COUNT(*),prodUserName,jobStatus,workingGroup,computingSite FROM %s WHERE prodSourceLabel=:prodSourceLabel AND modificationTime>:modificationTime GROUP BY prodUserName,jobStatus,workingGroup,computingSite" % table
def run(self): try: # get job tmpJobs = self.taskBuffer.getFullJobStatus([self.rPandaID]) if tmpJobs == [] or tmpJobs[0] == None: _logger.debug("cannot find job for PandaID=%s" % self.rPandaID) return self.job = tmpJobs[0] _logger.debug("%s start %s:%s:%s" % (self.token,self.job.jobDefinitionID,self.job.prodUserName,self.job.computingSite)) # using output container if not self.job.destinationDBlock.endswith('/'): _logger.debug("%s ouput dataset container is required" % self.token) _logger.debug("%s end" % self.token) return # FIXEME : dont' touch group jobs for now if self.job.destinationDBlock.startswith('group') and (not self.userRequest): _logger.debug("%s skip group jobs" % self.token) _logger.debug("%s end" % self.token) return # check processingType typesForRebro = ['pathena','prun','ganga','ganga-rbtest'] if not self.job.processingType in typesForRebro: _logger.debug("%s skip processingType=%s not in %s" % \ (self.token,self.job.processingType,str(typesForRebro))) _logger.debug("%s end" % self.token) return # check jobsetID if self.job.jobsetID in [0,'NULL',None]: _logger.debug("%s jobsetID is undefined" % self.token) _logger.debug("%s end" % self.token) return # check metadata if self.job.metadata in [None,'NULL']: _logger.debug("%s metadata is unavailable" % self.token) _logger.debug("%s end" % self.token) return # check --disableRebrokerage match = re.search("--disableRebrokerage",self.job.metadata) if match != None and (not self.simulation) and (not self.forceOpt) \ and (not self.userRequest): _logger.debug("%s diabled rebrokerage" % self.token) _logger.debug("%s end" % self.token) return # check --site match = re.search("--site",self.job.metadata) if match != None and (not self.simulation) and (not self.forceOpt) \ and (not self.userRequest): _logger.debug("%s --site is used" % self.token) _logger.debug("%s end" % self.token) return # check --libDS match = re.search("--libDS",self.job.metadata) if match != None: _logger.debug("%s --libDS is used" % self.token) _logger.debug("%s end" % self.token) return # check --workingGroup since it is site-specific match = re.search("--workingGroup",self.job.metadata) if match != None: _logger.debug("%s workingGroup is specified" % self.token) _logger.debug("%s end" % self.token) return # avoid too many rebrokerage if not self.checkRev(): _logger.debug("%s avoid too many rebrokerage" % self.token) _logger.debug("%s end" % self.token) return # check if multiple JobIDs use the same libDS if self.bPandaID != None and self.buildStatus not in ['finished','failed']: if self.minPandaIDlibDS == None or self.maxPandaIDlibDS == None: _logger.debug("%s max/min PandaIDs are unavailable for the libDS" % self.token) _logger.debug("%s end" % self.token) return tmpPandaIDsForLibDS = self.taskBuffer.getFullJobStatus([self.minPandaIDlibDS,self.maxPandaIDlibDS]) if len(tmpPandaIDsForLibDS) != 2 or tmpPandaIDsForLibDS[0] == None or tmpPandaIDsForLibDS[1] == None: _logger.debug("%s failed to get max/min PandaIDs for the libDS" % self.token) _logger.debug("%s end" % self.token) return # check if tmpPandaIDsForLibDS[0].jobDefinitionID != tmpPandaIDsForLibDS[1].jobDefinitionID: _logger.debug("%s multiple JobIDs use the libDS %s:%s %s:%s" % (self.token,tmpPandaIDsForLibDS[0].jobDefinitionID, self.minPandaIDlibDS,tmpPandaIDsForLibDS[1].jobDefinitionID, self.maxPandaIDlibDS)) _logger.debug("%s end" % self.token) return # check excludedSite if self.excludedSite == None: self.excludedSite = [] match = re.search("--excludedSite( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata) if match != None: self.excludedSite = match.group(3).split(',') # remove empty try: self.excludedSite.remove('') except: pass _logger.debug("%s excludedSite=%s" % (self.token,str(self.excludedSite))) # check cloud if self.cloud == None: match = re.search("--cloud( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata) if match != None: self.cloud = match.group(3) _logger.debug("%s cloud=%s" % (self.token,self.cloud)) # get inDS/LFNs status,tmpMapInDS,maxFileSize = self.taskBuffer.getInDatasetsForReBrokerage(self.jobID,self.userName) if not status: # failed _logger.error("%s failed to get inDS/LFN from DB" % self.token) return status,inputDS = self.getListDatasetsUsedByJob(tmpMapInDS) if not status: # failed _logger.error("%s failed" % self.token) return # get relicas replicaMap = {} unknownSites = {} for tmpDS in inputDS: if tmpDS.endswith('/'): # container status,tmpRepMaps = self.getListDatasetReplicasInContainer(tmpDS) else: # normal dataset status,tmpRepMap = self.getListDatasetReplicas(tmpDS) tmpRepMaps = {tmpDS:tmpRepMap} if not status: # failed _logger.debug("%s failed" % self.token) return # make map per site for tmpDS,tmpRepMap in tmpRepMaps.iteritems(): for tmpSite,tmpStat in tmpRepMap.iteritems(): # ignore special sites if tmpSite in ['CERN-PROD_TZERO','CERN-PROD_DAQ','CERN-PROD_TMPDISK']: continue # ignore tape sites if tmpSite.endswith('TAPE'): continue # keep sites with unknown replica info if tmpStat[-1]['found'] == None: if not unknownSites.has_key(tmpDS): unknownSites[tmpDS] = [] unknownSites[tmpDS].append(tmpSite) # ignore ToBeDeleted if tmpStat[-1]['archived'] in ['ToBeDeleted',]: continue # change EOS if tmpSite.startswith('CERN-PROD_EOS'): tmpSite = 'CERN-PROD_EOS' # change EOS TMP if tmpSite.startswith('CERN-PROD_TMP'): tmpSite = 'CERN-PROD_TMP' # change DISK to SCRATCHDISK tmpSite = re.sub('_[^_-]+DISK$','',tmpSite) # change PERF-XYZ to SCRATCHDISK tmpSite = re.sub('_PERF-[^_-]+$','',tmpSite) # change PHYS-XYZ to SCRATCHDISK tmpSite = re.sub('_PHYS-[^_-]+$','',tmpSite) # patch for BNLPANDA if tmpSite in ['BNLPANDA']: tmpSite = 'BNL-OSG2' # add to map if not replicaMap.has_key(tmpSite): replicaMap[tmpSite] = {} replicaMap[tmpSite][tmpDS] = tmpStat[-1] _logger.debug("%s replica map -> %s" % (self.token,str(replicaMap))) # refresh replica info in needed self.refreshReplicaInfo(unknownSites) # instantiate SiteMapper siteMapper = SiteMapper(self.taskBuffer) # get original DDM origSiteDDM = self.getAggName(siteMapper.getSite(self.job.computingSite).ddm) # check all datasets maxDQ2Sites = [] if inputDS != []: # loop over all sites for tmpSite,tmpDsVal in replicaMap.iteritems(): # loop over all datasets appendFlag = True for tmpOrigDS in inputDS: # check completeness if tmpDsVal.has_key(tmpOrigDS) and tmpDsVal[tmpOrigDS]['found'] != None and \ tmpDsVal[tmpOrigDS]['total'] == tmpDsVal[tmpOrigDS]['found']: pass else: appendFlag = False # append if appendFlag: if not tmpSite in maxDQ2Sites: maxDQ2Sites.append(tmpSite) _logger.debug("%s candidate DQ2s -> %s" % (self.token,str(maxDQ2Sites))) if inputDS != [] and maxDQ2Sites == []: _logger.debug("%s no DQ2 candidate" % self.token) else: maxPandaSites = [] # original maxinputsize origMaxInputSize = siteMapper.getSite(self.job.computingSite).maxinputsize # look for Panda siteIDs for tmpSiteID,tmpSiteSpec in siteMapper.siteSpecList.iteritems(): # use ANALY_ only if not tmpSiteID.startswith('ANALY_'): continue # remove test and local if re.search('_test',tmpSiteID,re.I) != None: continue if re.search('_local',tmpSiteID,re.I) != None: continue # avoid same site if self.avoidSameSite and self.getAggName(tmpSiteSpec.ddm) == origSiteDDM: continue # check DQ2 ID if self.cloud in [None,tmpSiteSpec.cloud] \ and (self.getAggName(tmpSiteSpec.ddm) in maxDQ2Sites or inputDS == []): # excluded sites excludedFlag = False for tmpExcSite in self.excludedSite: if re.search(tmpExcSite,tmpSiteID) != None: excludedFlag = True break if excludedFlag: _logger.debug("%s skip %s since excluded" % (self.token,tmpSiteID)) continue # use online only if tmpSiteSpec.status != 'online': _logger.debug("%s skip %s status=%s" % (self.token,tmpSiteID,tmpSiteSpec.status)) continue # check maxinputsize if (maxFileSize == None and origMaxInputSize > siteMapper.getSite(tmpSiteID).maxinputsize) or \ maxFileSize > siteMapper.getSite(tmpSiteID).maxinputsize: _logger.debug("%s skip %s due to maxinputsize" % (self.token,tmpSiteID)) continue # append if not tmpSiteID in maxPandaSites: maxPandaSites.append(tmpSiteID) # choose at most 20 sites randomly to avoid too many lookup random.shuffle(maxPandaSites) maxPandaSites = maxPandaSites[:20] _logger.debug("%s candidate PandaSites -> %s" % (self.token,str(maxPandaSites))) # no Panda siteIDs if maxPandaSites == []: _logger.debug("%s no Panda site candidate" % self.token) else: # set AtlasRelease and cmtConfig to dummy job tmpJobForBrokerage = JobSpec() if self.job.AtlasRelease in ['NULL',None]: tmpJobForBrokerage.AtlasRelease = '' else: tmpJobForBrokerage.AtlasRelease = self.job.AtlasRelease # use nightlies matchNight = re.search('^AnalysisTransforms-.*_(rel_\d+)$',self.job.homepackage) if matchNight != None: tmpJobForBrokerage.AtlasRelease += ':%s' % matchNight.group(1) # use cache else: matchCache = re.search('^AnalysisTransforms-([^/]+)',self.job.homepackage) if matchCache != None: tmpJobForBrokerage.AtlasRelease = matchCache.group(1).replace('_','-') if not self.job.cmtConfig in ['NULL',None]: tmpJobForBrokerage.cmtConfig = self.job.cmtConfig # memory size if not self.job.minRamCount in ['NULL',None,0]: tmpJobForBrokerage.minRamCount = self.job.minRamCount # CPU count if not self.job.maxCpuCount in ['NULL',None,0]: tmpJobForBrokerage.maxCpuCount = self.job.maxCpuCount # run brokerage brokerage.broker.schedule([tmpJobForBrokerage],self.taskBuffer,siteMapper,forAnalysis=True, setScanSiteList=maxPandaSites,trustIS=True,reportLog=True) newSiteID = tmpJobForBrokerage.computingSite self.brokerageInfo += tmpJobForBrokerage.brokerageErrorDiag _logger.debug("%s runBrokerage - > %s" % (self.token,newSiteID)) # unknown site if not siteMapper.checkSite(newSiteID): _logger.error("%s unknown site" % self.token) _logger.debug("%s failed" % self.token) return # get new site spec newSiteSpec = siteMapper.getSite(newSiteID) # avoid repetition if self.getAggName(newSiteSpec.ddm) == origSiteDDM: _logger.debug("%s assigned to the same site %s " % (self.token,newSiteID)) _logger.debug("%s end" % self.token) return # simulation mode if self.simulation: _logger.debug("%s end simulation" % self.token) return # prepare jobs status = self.prepareJob(newSiteID,newSiteSpec) if status: # run SetUpper statusSetUp = self.runSetUpper() if not statusSetUp: _logger.debug("%s runSetUpper failed" % self.token) else: _logger.debug("%s successfully assigned to %s" % (self.token,newSiteID)) _logger.debug("%s end" % self.token) except: errType,errValue,errTraceBack = sys.exc_info() _logger.error("%s run() : %s %s" % (self.token,errType,errValue))
from brokerage.SiteMapper import SiteMapper # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('shareMgr') _logger.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # number of jobs to be activated per queue nJobsPerQueue = 50 # priority threshold prioCutoff = 950 # get high prio jobs without throttling sql = "SELECT distinct computingSite FROM ATLAS_PANDA.jobsActive4 " sql += "WHERE jobStatus=:s1 AND prodSourceLabel IN (:p1) AND lockedBy=:lockedBy " sql += "AND currentPriority>=:prioCutoff " varMap = {} varMap[':s1'] = 'throttled' varMap[':p1'] = 'managed' varMap[':lockedBy'] = 'jedi'
# password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('prioryMassage') tmpLog = LogWrapper(_logger) tmpLog.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # get usage breakdown usageBreakDownPerUser = {} usageBreakDownPerSite = {} workingGroupList = [] for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsArchived4']: varMap = {} varMap[':prodSourceLabel'] = 'user' varMap[':pmerge'] = 'pmerge' if table == 'ATLAS_PANDA.jobsActive4': sql = "SELECT COUNT(*),prodUserName,jobStatus,workingGroup,computingSite FROM %s WHERE prodSourceLabel=:prodSourceLabel AND processingType<>:pmerge GROUP BY prodUserName,jobStatus,workingGroup,computingSite" % table else: # with time range for archived table varMap[':modificationTime'] = datetime.datetime.utcnow() - datetime.timedelta(minutes=60) sql = "SELECT COUNT(*),prodUserName,jobStatus,workingGroup,computingSite FROM %s WHERE prodSourceLabel=:prodSourceLabel AND processingType<>:pmerge AND modificationTime>:modificationTime GROUP BY prodUserName,jobStatus,workingGroup,computingSite" % table
def run(self): # start try: byCallback = False if self.job == None: byCallback = True _logger.debug("start: %s" % self.dataset.name) _logger.debug("callback from %s" % self.site) # FIXME when callback from BNLPANDA disappeared if self.site == 'BNLPANDA': self.site = 'BNL-OSG2_ATLASMCDISK' # instantiate site mapper siteMapper = SiteMapper(self.taskBuffer) # get computingSite/destinationSE computingSite,destinationSE = self.taskBuffer.getDestSE(self.dataset.name) if destinationSE == None: # try to get computingSite/destinationSE from ARCH to delete sub # even if no active jobs left computingSite,destinationSE = self.taskBuffer.getDestSE(self.dataset.name,True) if destinationSE == None: _logger.error("cannot get source/destination for %s" % self.dataset.name) _logger.debug("end: %s" % self.dataset.name) return _logger.debug("src: %s" % computingSite) _logger.debug("dst: %s" % destinationSE) # get corresponding token tmpSrcSiteSpec = siteMapper.getSite(computingSite) tmpDstSiteSpec = siteMapper.getSite(destinationSE) _logger.debug(tmpDstSiteSpec.setokens_output) destToken = None for tmpToken,tmpDdmId in tmpDstSiteSpec.setokens_output.iteritems(): if self.site == tmpDdmId: destToken = tmpToken break _logger.debug("use Token=%s" % destToken) # get required tokens reqTokens = self.taskBuffer.getDestTokens(self.dataset.name) if reqTokens == None: _logger.error("cannot get required token for %s" % self.dataset.name) _logger.debug("end: %s" % self.dataset.name) return _logger.debug("req Token=%s" % reqTokens) # make bitmap for the token bitMap = 1 if len(reqTokens.split(','))>1: for tmpReqToken in reqTokens.split(','): if tmpReqToken == destToken: break # shift one bit bitMap <<= 1 # completed bitmap compBitMap = (1 << len(reqTokens.split(',')))-1 # ignore the lowest bit for T1, file on DISK is already there if tmpSrcSiteSpec.ddm_output == tmpDstSiteSpec.ddm_output: compBitMap = compBitMap & 0xFFFE # update bitmap in DB updatedBitMap = self.taskBuffer.updateTransferStatus(self.dataset.name,bitMap) _logger.debug("transfer status:%s - comp:%s - bit:%s" % (hex(updatedBitMap),hex(compBitMap),hex(bitMap))) # update output files if (updatedBitMap & compBitMap) == compBitMap: ids = self.taskBuffer.updateOutFilesReturnPandaIDs(self.dataset.name) # set flag for T2 cleanup self.dataset.status = 'cleanup' self.taskBuffer.updateDatasets([self.dataset]) else: _logger.debug("end: %s" % self.dataset.name) return else: _logger.debug("start: %s" % self.job.PandaID) # update input files ids = [self.job.PandaID] _logger.debug("IDs: %s" % ids) if len(ids) != 0: # get job if self.job == None: jobs = self.taskBuffer.peekJobs(ids,fromDefined=False,fromArchived=False,fromWaiting=False) else: jobs = [self.job] # loop over all jobs for job in jobs: if job == None: continue _logger.debug("Job: %s" % job.PandaID) if job.jobStatus == 'transferring': jobReady = True failedFiles = [] noOutFiles = [] # check file status for file in job.Files: if file.type == 'output' or file.type == 'log': if file.status == 'failed': failedFiles.append(file.lfn) elif file.status == 'nooutput': noOutFiles.append(file.lfn) elif file.status != 'ready': _logger.debug("Job: %s file:%s %s != ready" % (job.PandaID,file.lfn,file.status)) jobReady = False break # finish job if jobReady: if byCallback: _logger.debug("Job: %s all files ready" % job.PandaID) else: _logger.debug("Job: %s all files checked with catalog" % job.PandaID) # create XML try: import xml.dom.minidom dom = xml.dom.minidom.getDOMImplementation() doc = dom.createDocument(None,'xml',None) topNode = doc.createElement("POOLFILECATALOG") for file in job.Files: if file.type in ['output','log']: # skip failed or no-output files if file.lfn in failedFiles+noOutFiles: continue # File fileNode = doc.createElement("File") fileNode.setAttribute("ID",file.GUID) # LFN logNode = doc.createElement("logical") lfnNode = doc.createElement("lfn") lfnNode.setAttribute('name',file.lfn) # metadata fsizeNode = doc.createElement("metadata") fsizeNode.setAttribute("att_name","fsize") fsizeNode.setAttribute("att_value",str(file.fsize)) # checksum if file.checksum.startswith('ad:'): # adler32 chksumNode = doc.createElement("metadata") chksumNode.setAttribute("att_name","adler32") chksumNode.setAttribute("att_value",re.sub('^ad:','',file.checksum)) else: # md5sum chksumNode = doc.createElement("metadata") chksumNode.setAttribute("att_name","md5sum") chksumNode.setAttribute("att_value",re.sub('^md5:','',file.checksum)) # append nodes logNode.appendChild(lfnNode) fileNode.appendChild(logNode) fileNode.appendChild(fsizeNode) fileNode.appendChild(chksumNode) topNode.appendChild(fileNode) # status in file name if failedFiles == []: statusFileName = 'finished' else: statusFileName = 'failed' # write to file xmlFile = '%s/%s_%s_%s' % (panda_config.logdir,job.PandaID,statusFileName,commands.getoutput('uuidgen')) oXML = open(xmlFile,"w") oXML.write(topNode.toxml()) oXML.close() except: type, value, traceBack = sys.exc_info() _logger.error("Job: %s %s %s" % (job.PandaID,type,value)) _logger.debug("Job: %s status: %s" % (job.PandaID,job.jobStatus)) # end if self.job == None: _logger.debug("end: %s" % self.dataset.name) else: _logger.debug("end: %s" % self.job.PandaID) except: type, value, traceBack = sys.exc_info() _logger.error("run() : %s %s" % (type,value))
import re import sys import urllib2, urllib from dq2.info import TiersOfATLAS import userinterface.Client as Client from userinterface.Client import baseURLSSL from taskbuffer.TaskBuffer import taskBuffer from brokerage.SiteMapper import SiteMapper from config import panda_config # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) import httplib import commands id = sys.argv[1] s, o = Client.getJobStatus([id]) if s != 0: print "failed to get job with:%s" % s sys.exit(0) job = o[0] if job == None: print "got None"
# password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('esPreemption') tmpLog = LogWrapper(_logger) tmpLog.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=15) # get low priority ES jobs per site sqlEsJobs = "SELECT PandaID,computingSite,commandToPilot,startTime " sqlEsJobs += "FROM {0}.jobsActive4 ".format(panda_config.schemaPANDA) sqlEsJobs += "WHERE prodSourceLabel IN (:label1,:label2) AND eventService=:es " sqlEsJobs += "AND currentPriority<:prio AND jobStatus=:jobStat " sqlEsJobs += "ORDER BY currentPriority,PandaID " varMap = {} varMap[':label1'] = 'managed' varMap[':label2'] = 'test' varMap[':es'] = 1
def run(self): try: _logger.debug('%s Start %s' % (self.pandaID,self.job.jobStatus)) flagComplete = True ddmJobs = [] topUserDsList = [] usingMerger = False disableNotifier = False firstIndvDS = True finalStatusDS = [] for destinationDBlock in self.destinationDBlocks: dsList = [] _logger.debug('%s start %s' % (self.pandaID,destinationDBlock)) # ignore tid datasets if re.search('_tid[\d_]+$',destinationDBlock): _logger.debug('%s skip %s' % (self.pandaID,destinationDBlock)) continue # ignore HC datasets if re.search('^hc_test\.',destinationDBlock) != None or re.search('^user\.gangarbt\.',destinationDBlock) != None: if re.search('_sub\d+$',destinationDBlock) == None and re.search('\.lib$',destinationDBlock) == None: _logger.debug('%s skip HC %s' % (self.pandaID,destinationDBlock)) continue # query dataset if self.datasetMap.has_key(destinationDBlock): dataset = self.datasetMap[destinationDBlock] else: dataset = self.taskBuffer.queryDatasetWithMap({'name':destinationDBlock}) if dataset == None: _logger.error('%s Not found : %s' % (self.pandaID,destinationDBlock)) flagComplete = False continue # skip tobedeleted/tobeclosed if dataset.status in ['cleanup','tobeclosed','completed']: _logger.debug('%s skip %s due to %s' % (self.pandaID,destinationDBlock,dataset.status)) continue dsList.append(dataset) # sort dsList.sort() # count number of completed files notFinish = self.taskBuffer.countFilesWithMap({'destinationDBlock':destinationDBlock, 'status':'unknown'}) if notFinish < 0: _logger.error('%s Invalid DB return : %s' % (self.pandaID,notFinish)) flagComplete = False continue # check if completed _logger.debug('%s notFinish:%s' % (self.pandaID,notFinish)) if self.job.destinationSE == 'local' and self.job.prodSourceLabel in ['user','panda']: # close non-DQ2 destinationDBlock immediately finalStatus = 'closed' elif self.job.lockedby == 'jedi' and self.isTopLevelDS(destinationDBlock): # set it closed in order not to trigger DDM cleanup. It will be closed by JEDI finalStatus = 'closed' elif self.job.prodSourceLabel in ['user'] and "--mergeOutput" in self.job.jobParameters \ and self.job.processingType != 'usermerge': # merge output files if firstIndvDS: # set 'tobemerged' to only the first dataset to avoid triggering many Mergers for --individualOutDS finalStatus = 'tobemerged' firstIndvDS = False else: finalStatus = 'tobeclosed' # set merging to top dataset usingMerger = True # disable Notifier disableNotifier = True elif self.job.produceUnMerge(): finalStatus = 'doing' else: # set status to 'tobeclosed' to trigger DQ2 closing finalStatus = 'tobeclosed' if notFinish==0: _logger.debug('%s set %s to dataset : %s' % (self.pandaID,finalStatus,destinationDBlock)) # set status dataset.status = finalStatus # update dataset in DB retT = self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ", criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'}) if len(retT) > 0 and retT[0]==1: finalStatusDS += dsList # close user datasets if self.job.prodSourceLabel in ['user'] and self.job.destinationDBlock.endswith('/') \ and (dataset.name.startswith('user') or dataset.name.startswith('group')): # get top-level user dataset topUserDsName = re.sub('_sub\d+$','',dataset.name) # update if it is the first attempt if topUserDsName != dataset.name and not topUserDsName in topUserDsList and self.job.lockedby != 'jedi': topUserDs = self.taskBuffer.queryDatasetWithMap({'name':topUserDsName}) if topUserDs != None: # check status if topUserDs.status in ['completed','cleanup','tobeclosed', 'tobemerged','merging']: _logger.debug('%s skip %s due to status=%s' % (self.pandaID,topUserDsName,topUserDs.status)) else: # set status if self.job.processingType.startswith('gangarobot') or \ self.job.processingType.startswith('hammercloud'): # not trigger freezing for HC datasets so that files can be appended topUserDs.status = 'completed' elif not usingMerger: topUserDs.status = finalStatus else: topUserDs.status = 'merging' # append to avoid repetition topUserDsList.append(topUserDsName) # update DB retTopT = self.taskBuffer.updateDatasets([topUserDs],withLock=True,withCriteria="status<>:crStatus", criteriaMap={':crStatus':topUserDs.status}) if len(retTopT) > 0 and retTopT[0]==1: _logger.debug('%s set %s to top dataset : %s' % (self.pandaID,topUserDs.status,topUserDsName)) else: _logger.debug('%s failed to update top dataset : %s' % (self.pandaID,topUserDsName)) # get parent dataset for merge job if self.job.processingType == 'usermerge': tmpMatch = re.search('--parentDS ([^ \'\"]+)',self.job.jobParameters) if tmpMatch == None: _logger.error('%s failed to extract parentDS' % self.pandaID) else: unmergedDsName = tmpMatch.group(1) # update if it is the first attempt if not unmergedDsName in topUserDsList: unmergedDs = self.taskBuffer.queryDatasetWithMap({'name':unmergedDsName}) if unmergedDs == None: _logger.error('%s failed to get parentDS=%s from DB' % (self.pandaID,unmergedDsName)) else: # check status if unmergedDs.status in ['completed','cleanup','tobeclosed']: _logger.debug('%s skip %s due to status=%s' % (self.pandaID,unmergedDsName,unmergedDs.status)) else: # set status unmergedDs.status = finalStatus # append to avoid repetition topUserDsList.append(unmergedDsName) # update DB retTopT = self.taskBuffer.updateDatasets([unmergedDs],withLock=True,withCriteria="status<>:crStatus", criteriaMap={':crStatus':unmergedDs.status}) if len(retTopT) > 0 and retTopT[0]==1: _logger.debug('%s set %s to parent dataset : %s' % (self.pandaID,unmergedDs.status,unmergedDsName)) else: _logger.debug('%s failed to update parent dataset : %s' % (self.pandaID,unmergedDsName)) if self.pandaDDM and self.job.prodSourceLabel=='managed': # instantiate SiteMapper if self.siteMapper == None: self.siteMapper = SiteMapper(self.taskBuffer) # get file list for PandaDDM retList = self.taskBuffer.queryFilesWithMap({'destinationDBlock':destinationDBlock}) lfnsStr = '' guidStr = '' for tmpFile in retList: if tmpFile.type in ['log','output']: lfnsStr += '%s,' % tmpFile.lfn guidStr += '%s,' % tmpFile.GUID if lfnsStr != '': guidStr = guidStr[:-1] lfnsStr = lfnsStr[:-1] # create a DDM job ddmjob = JobSpec() ddmjob.jobDefinitionID = int(time.time()) % 10000 ddmjob.jobName = "%s" % commands.getoutput('uuidgen') ddmjob.transformation = 'http://pandaserver.cern.ch:25080/trf/mover/run_dq2_cr' ddmjob.destinationDBlock = 'testpanda.%s' % ddmjob.jobName ddmjob.computingSite = "BNL_ATLAS_DDM" ddmjob.destinationSE = ddmjob.computingSite ddmjob.currentPriority = 200000 ddmjob.prodSourceLabel = 'ddm' ddmjob.transferType = 'sub' # append log file fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % ddmjob.jobName fileOL.destinationDBlock = ddmjob.destinationDBlock fileOL.destinationSE = ddmjob.destinationSE fileOL.dataset = ddmjob.destinationDBlock fileOL.type = 'log' ddmjob.addFile(fileOL) # make arguments dstDQ2ID = 'BNLPANDA' srcDQ2ID = self.siteMapper.getSite(self.job.computingSite).ddm callBackURL = 'https://%s:%s/server/panda/datasetCompleted?vuid=%s&site=%s' % \ (panda_config.pserverhost,panda_config.pserverport, dataset.vuid,dstDQ2ID) _logger.debug(callBackURL) # set src/dest ddmjob.sourceSite = srcDQ2ID ddmjob.destinationSite = dstDQ2ID # if src==dst, send callback without ddm job if dstDQ2ID == srcDQ2ID: comout = commands.getoutput('curl -k %s' % callBackURL) _logger.debug(comout) else: # run dq2_cr callBackURL = urllib.quote(callBackURL) # get destination dir destDir = brokerage.broker_util._getDefaultStorage(self.siteMapper.getSite(self.job.computingSite).dq2url) argStr = "-s %s -r %s --guids %s --lfns %s --callBack %s -d %s/%s %s" % \ (srcDQ2ID,dstDQ2ID,guidStr,lfnsStr,callBackURL,destDir, destinationDBlock,destinationDBlock) # set job parameters ddmjob.jobParameters = argStr _logger.debug('%s pdq2_cr %s' % (self.pandaID,ddmjob.jobParameters)) ddmJobs.append(ddmjob) # start Activator if re.search('_sub\d+$',dataset.name) == None: if self.job.prodSourceLabel=='panda' and self.job.processingType in ['merge','unmerge']: # don't trigger Activator for merge jobs pass else: if self.job.jobStatus == 'finished': aThr = Activator(self.taskBuffer,dataset) aThr.start() aThr.join() else: # unset flag since another thread already updated #flagComplete = False pass else: # update dataset in DB self.taskBuffer.updateDatasets(dsList,withLock=True,withCriteria="status<>:crStatus AND status<>:lockStatus ", criteriaMap={':crStatus':finalStatus,':lockStatus':'locked'}) # unset flag flagComplete = False # end _logger.debug('%s end %s' % (self.pandaID,destinationDBlock)) # start DDM jobs if ddmJobs != []: self.taskBuffer.storeJobs(ddmJobs,self.job.prodUserID,joinThr=True) # change pending jobs to failed finalizedFlag = True if flagComplete and self.job.prodSourceLabel=='user': _logger.debug('%s finalize %s %s' % (self.pandaID,self.job.prodUserName,self.job.jobDefinitionID)) finalizedFlag = self.taskBuffer.finalizePendingJobs(self.job.prodUserName,self.job.jobDefinitionID,waitLock=True) _logger.debug('%s finalized with %s' % (self.pandaID,finalizedFlag)) # update unmerged datasets in JEDI to trigger merging if flagComplete and self.job.produceUnMerge() and finalStatusDS != []: if finalizedFlag: self.taskBuffer.updateUnmergedDatasets(self.job,finalStatusDS) # start notifier _logger.debug('%s source:%s complete:%s' % (self.pandaID,self.job.prodSourceLabel,flagComplete)) if (self.job.jobStatus != 'transferring') and ((flagComplete and self.job.prodSourceLabel=='user') or \ (self.job.jobStatus=='failed' and self.job.prodSourceLabel=='panda')) and \ self.job.lockedby != 'jedi': # don't send email for merge jobs if (not disableNotifier) and not self.job.processingType in ['merge','unmerge']: useNotifier = True summaryInfo = {} # check all jobDefIDs in jobsetID if not self.job.jobsetID in [0,None,'NULL']: useNotifier,summaryInfo = self.taskBuffer.checkDatasetStatusForNotifier(self.job.jobsetID,self.job.jobDefinitionID, self.job.prodUserName) _logger.debug('%s useNotifier:%s' % (self.pandaID,useNotifier)) if useNotifier: _logger.debug('%s start Notifier' % self.pandaID) nThr = Notifier.Notifier(self.taskBuffer,self.job,self.destinationDBlocks,summaryInfo) nThr.run() _logger.debug('%s end Notifier' % self.pandaID) _logger.debug('%s End' % self.pandaID) except: errType,errValue = sys.exc_info()[:2] _logger.error("%s %s" % (errType,errValue))
import re import sys import urllib2,urllib from dq2.info import TiersOfATLAS import userinterface.Client as Client from userinterface.Client import baseURLSSL from taskbuffer.TaskBuffer import taskBuffer from brokerage.SiteMapper import SiteMapper from config import panda_config # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) import httplib import commands id = sys.argv[1] s,o = Client.getJobStatus([id]) if s != 0: print "failed to get job with:%s" % s sys.exit(0) job = o[0] if job == None:
startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: _logger.debug("old process : %s %s" % (pid,startTime)) _logger.debug(line) commands.getoutput('kill -9 %s' % pid) except: type, value, traceBack = sys.exc_info() _logger.error("kill process : %s %s" % (type,value)) # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) _memoryCheck("rebroker") # rebrokerage _logger.debug("Rebrokerage start") try: normalTimeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=24) sortTimeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=3) sql = "SELECT jobDefinitionID,prodUserName,prodUserID,computingSite,MAX(modificationTime),jediTaskID,processingType " sql += "FROM ATLAS_PANDA.jobsActive4 " sql += "WHERE prodSourceLabel IN (:prodSourceLabel1,:prodSourceLabel2) AND jobStatus IN (:jobStatus1,:jobStatus2) " sql += "AND modificationTime<:modificationTime " sql += "AND jobsetID IS NOT NULL " sql += "AND lockedBy=:lockedBy " sql += "GROUP BY jobDefinitionID,prodUserName,prodUserID,computingSite,jediTaskID,processingType "
startTime = datetime.datetime( *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: tmpLog.debug("old process : %s %s" % (pid, startTime)) tmpLog.debug(line) commands.getoutput('kill -9 %s' % pid) except: type, value, traceBack = sys.exc_info() tmpLog.error("kill process : %s %s" % (type, value)) # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper aSiteMapper = SiteMapper(taskBuffer) # delete tmpLog.debug("Del session") status, retSel = taskBuffer.querySQLS( "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {}) if retSel != None: try: maxID = retSel[0][0] tmpLog.debug("maxID : %s" % maxID) if maxID != None: varMap = {} varMap[':maxID'] = maxID varMap[':jobStatus1'] = 'activated' varMap[':jobStatus2'] = 'waiting' varMap[':jobStatus3'] = 'failed'
# password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('esPreemption') tmpLog = LogWrapper(_logger) tmpLog.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # time limit timeLimit = datetime.datetime.utcnow()-datetime.timedelta(minutes=15) # get low priority ES jobs per site sqlEsJobs = "SELECT PandaID,computingSite,commandToPilot,startTime " sqlEsJobs += "FROM {0}.jobsActive4 ".format(panda_config.schemaPANDA) sqlEsJobs += "WHERE prodSourceLabel IN (:label1,:label2) AND eventService=:es " sqlEsJobs += "AND currentPriority<:prio AND jobStatus=:jobStat " sqlEsJobs += "ORDER BY currentPriority,PandaID " varMap = {} varMap[':label1'] = 'managed' varMap[':label2'] = 'test' varMap[':es'] = 1