def run(self): self.lock.acquire() try: # loop over all datasets for vuid,name,modDate in self.datasets: # only dis datasets if re.search('_dis\d+$',name) is None: _logger.error("Eraser : non disDS %s" % name) continue # delete _logger.debug("Eraser %s dis %s %s" % (self.operationType,modDate,name)) # delete or shorten endStatus = 'deleted' status,out = rucioAPI.eraseDataset(name) if not status: _logger.error(out) continue _logger.debug('OK with %s' % name) # update self.proxyLock.acquire() varMap = {} varMap[':vuid'] = vuid varMap[':status'] = endStatus taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET status=:status,modificationdate=CURRENT_DATE WHERE vuid=:vuid", varMap) self.proxyLock.release() except Exception: errStr = traceback.format_exc() _logger.error(errStr) self.pool.remove(self) self.lock.release()
def run(self): self.lock.acquire() try: # loop over all datasets for vuid,name,modDate in self.datasets: _logger.debug("Close %s %s" % (modDate,name)) dsExists = True if name.startswith('pandaddm_') or name.startswith('user.') or name.startswith('group.') \ or name.startswith('hc_test.') or name.startswith('panda.um.'): dsExists = False if dsExists: # check if dataset exists status,out = rucioAPI.getMetaData(name) if status == True: if out is not None: try: rucioAPI.closeDataset(name) status = True except Exception: errtype,errvalue = sys.exc_info()[:2] out = 'failed to freeze : {0} {1}'.format(errtype,errvalue) status = False else: # dataset not exist status,out = True,'' dsExists = False else: status,out = True,'' if not status: _logger.error('{0} failed to close with {1}'.format(name,out)) else: self.proxyLock.acquire() varMap = {} varMap[':vuid'] = vuid varMap[':newstatus'] = 'completed' varMap[':oldstatus'] = 'tobeclosed' taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET status=:newstatus,modificationdate=CURRENT_DATE WHERE vuid=:vuid AND status=:oldstatus", varMap) self.proxyLock.release() # set tobedeleted to dis setTobeDeletedToDis(name) # skip if dataset is not real if not dsExists: continue # count # of files status,out = rucioAPI.getNumberOfFiles(name) if status is not True: if status is False: _logger.error(out) else: _logger.debug(out) try: nFile = int(out) if nFile == 0: # erase dataset _logger.debug('erase %s' % name) status,out = rucioAPI.eraseDataset(name) _logger.debug('OK with %s' % name) except Exception: pass except Exception: pass self.pool.remove(self) self.lock.release()
def run(self): self.lock.acquire() try: for vuid,name,modDate in self.datasets: _logger.debug("Freezer start %s %s" % (modDate,name)) self.proxyLock.acquire() retF,resF = taskBuffer.querySQLS("SELECT /*+ index(tab FILESTABLE4_DESTDBLOCK_IDX) */ PandaID,status FROM ATLAS_PANDA.filesTable4 tab WHERE destinationDBlock=:destinationDBlock ", {':destinationDBlock':name}) self.proxyLock.release() if retF < 0: _logger.error("SQL error") else: allFinished = True onePandaID = None for tmpPandaID,tmpFileStatus in resF: onePandaID = tmpPandaID if not tmpFileStatus in ['ready', 'failed', 'skipped', 'merging', 'finished']: allFinished = False break # check sub datasets in the jobset for event service job if allFinished: self.proxyLock.acquire() tmpJobs = taskBuffer.getFullJobStatus([onePandaID]) self.proxyLock.release() if len(tmpJobs) > 0 and tmpJobs[0] is not None: if EventServiceUtils.isEventServiceMerge(tmpJobs[0]): self.proxyLock.acquire() cThr = Closer(taskBuffer, [], tmpJobs[0]) allFinished = cThr.checkSubDatasetsInJobset() self.proxyLock.release() _logger.debug("closer checked sub datasets in the jobset for %s : %s" % (name, allFinished)) # no files in filesTable if allFinished: _logger.debug("freeze %s " % name) dsExists = True if name.startswith('pandaddm_') or name.startswith('user.') or name.startswith('group.') \ or name.startswith('hc_test.') or name.startswith('panda.um.'): dsExists = False if name.startswith('panda.um.'): self.proxyLock.acquire() retMer,resMer = taskBuffer.querySQLS("SELECT /*+ index(tab FILESTABLE4_DESTDBLOCK_IDX) */ PandaID FROM ATLAS_PANDA.filesTable4 tab WHERE destinationDBlock=:destinationDBlock AND status IN (:statusM,:statusF) ", {':destinationDBlock':name, ':statusM':'merging', ':statusF':'failed'}) self.proxyLock.release() if resMer is not None and len(resMer)>0: mergeID = resMer[0][0] # get merging jobs self.proxyLock.acquire() mergingJobs = taskBuffer.peekJobs([mergeID],fromDefined=False,fromArchived=False,fromWaiting=False) self.proxyLock.release() mergeJob = mergingJobs[0] if mergeJob is not None: tmpDestDBlocks = [] # get destDBlock for tmpFile in mergeJob.Files: if tmpFile.type in ['output','log']: if not tmpFile.destinationDBlock in tmpDestDBlocks: tmpDestDBlocks.append(tmpFile.destinationDBlock) # run _logger.debug("start JEDI closer for %s " % name) self.proxyLock.acquire() cThr = Closer(taskBuffer,tmpDestDBlocks,mergeJob) cThr.start() cThr.join() self.proxyLock.release() _logger.debug("end JEDI closer for %s " % name) continue else: _logger.debug("failed to get merging job for %s " % name) else: _logger.debug("failed to get merging file for %s " % name) status,out = True,'' elif dsExists: # check if dataset exists status,out = rucioAPI.getMetaData(name) if status == True: if out is not None: try: rucioAPI.closeDataset(name) status = True except Exception: errtype,errvalue = sys.exc_info()[:2] out = 'failed to freeze : {0} {1}'.format(errtype,errvalue) status = False else: # dataset not exist status,out = True,'' dsExists = False else: status,out = True,'' if not status: _logger.error('{0} failed to freeze with {1}'.format(name,out)) else: self.proxyLock.acquire() varMap = {} varMap[':vuid'] = vuid varMap[':status'] = 'completed' taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET status=:status,modificationdate=CURRENT_DATE WHERE vuid=:vuid", varMap) self.proxyLock.release() if name.startswith('pandaddm_') or name.startswith('panda.um.') or not dsExists: continue # set tobedeleted to dis setTobeDeletedToDis(name) # count # of files status,out = rucioAPI.getNumberOfFiles(name) if status is not True: if status is False: _logger.error(out) else: _logger.debug(out) try: nFile = int(out) _logger.debug(nFile) if nFile == 0: # erase dataset _logger.debug('erase %s' % name) status,out = rucioAPI.eraseDataset(name) _logger.debug('OK with %s' % name) except Exception: pass else: _logger.debug("wait %s " % name) self.proxyLock.acquire() taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET modificationdate=CURRENT_DATE WHERE vuid=:vuid", {':vuid':vuid}) self.proxyLock.release() _logger.debug("end %s " % name) except Exception: errStr = traceback.format_exc() _logger.error(errStr) self.pool.remove(self) self.lock.release()
def run(self): self.lock.acquire() try: for vuid,name,modDate in self.datasets: # check just in case if re.search('_sub\d+$',name) is None: _logger.debug("skip non sub %s" % name) continue _logger.debug("delete sub %s" % name) if name.startswith('pandaddm_') or name.startswith('user.') or name.startswith('group.') \ or name.startswith('hc_test.') or name.startswith('panda.um.'): dsExists = False else: dsExists = True # get PandaIDs self.proxyLock.acquire() retF,resF = taskBuffer.querySQLS("SELECT /*+ index(tab FILESTABLE4_DESTDBLOCK_IDX) */ DISTINCT PandaID FROM ATLAS_PANDA.filesTable4 tab WHERE destinationDBlock=:destinationDBlock ", {':destinationDBlock':name}) self.proxyLock.release() if retF is None: _logger.error("SQL error for sub {0}".format(name)) continue else: _logger.debug("sub {0} has {1} jobs".format(name,len(resF))) self.proxyLock.acquire() # check jobs sqlP = "SELECT jobStatus FROM ATLAS_PANDA.jobsArchived4 WHERE PandaID=:PandaID " sqlP += "UNION " sqlP += "SELECT jobStatus FROM ATLAS_PANDAARCH.jobsArchived WHERE PandaID=:PandaID AND modificationTime>CURRENT_DATE-30 " allDone = True for pandaID, in resF: retP,resP = taskBuffer.querySQLS(sqlP, {':PandaID':pandaID}) if len(resP) == 0: _logger.debug("skip delete sub {0} PandaID={1} not found".format(name,pandaID)) allDone = False break jobStatus = resP[0][0] if jobStatus not in ['finished','failed','cancelled','closed']: _logger.debug("skip delete sub {0} PandaID={1} is active {2}".format(name,pandaID,jobStatus)) allDone = False break self.proxyLock.release() if allDone: _logger.debug("deleting sub %s" % name) try: rucioAPI.eraseDataset(name, grace_period=4) status = True except Exception: errtype,errvalue = sys.exc_info()[:2] out = '{0} {1}'.format(errtype,errvalue) _logger.error('{0} failed to erase with {1}'.format(name,out)) else: _logger.debug("wait sub %s" % name) continue # update dataset self.proxyLock.acquire() varMap = {} varMap[':vuid'] = vuid varMap[':ost1'] = 'completed' varMap[':ost2'] = 'cleanup' varMap[':newStatus'] = 'deleted' taskBuffer.querySQLS("UPDATE ATLAS_PANDA.Datasets SET status=:newStatus,modificationdate=CURRENT_DATE WHERE vuid=:vuid AND status IN (:ost1,:ost2) ", varMap) self.proxyLock.release() _logger.debug("end %s " % name) except Exception: errStr = traceback.format_exc() _logger.error(errStr) self.pool.remove(self) self.lock.release()