class PilotStreaming: def __init__(self): self._logger = PandaLogger().getLogger('PilotStreaming') return def run(self): """ Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester via the command interface :return: """ # timing time_start = time.time() self._logger.debug('Start.') # get unified pilot streaming (ups) queues ups_queues = taskBuffer.ups_get_queues() self._logger.debug('UPS queues: {0}'.format(ups_queues)) # get worker stats worker_stats = taskBuffer.ups_load_worker_stats() for ups_queue in ups_queues: # get the worker and job stats for the queue try: tmp_worker_stats = worker_stats[ups_queue] self._logger.debug('worker_stats for queue {0}: {1}'.format(ups_queue, tmp_worker_stats)) # tmp_job_stats = job_stats[ups_queue] except KeyError: # skip queue if no data available self._logger.debug('No worker stats for queue {0}'.format(ups_queue)) continue new_workers_per_harvester = taskBuffer.ups_new_worker_distribution(ups_queue, tmp_worker_stats) self._logger.info('queue: {0}, results: {1}'.format(ups_queue, new_workers_per_harvester)) # variables for the harvester command command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue) status = 'new' ack_requested = False lock_interval = None com_interval = None for harvester_id in new_workers_per_harvester: params = new_workers_per_harvester[harvester_id] taskBuffer.commandToHarvester(harvester_id, command, ack_requested, status, lock_interval, com_interval, params) # timing time_stop = time.time() self._logger.debug('Done. Pilot streaming took: {0} s'.format(time_stop - time_start)) return
class PilotStreaming: def __init__(self): self._logger = PandaLogger().getLogger('PilotStreaming') return def run(self): """ Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester via the command interface :return: """ # get unified pilot streaming (ups) queues ups_queues = taskBuffer.ups_get_queues() self._logger.debug('UPS queues: {0}'.format(ups_queues)) # get worker stats worker_stats = taskBuffer.ups_load_worker_stats() # get global share distribution # hs_distribution = proxyS.get_hs_distribution() # gs_tree = proxyS # print(proxyS.tree.pretty_print_hs_distribution(proxyS._DBProxy__hs_distribution for ups_queue in ups_queues: # get the worker and job stats for the queue try: tmp_worker_stats = worker_stats[ups_queue] self._logger.debug('worker_stats for queue {0}: {1}'.format(ups_queue, tmp_worker_stats)) # tmp_job_stats = job_stats[ups_queue] except KeyError: # skip queue if no data available self._logger.debug('No worker stats for queue {0}'.format(ups_queue)) continue new_workers_per_harvester = taskBuffer.ups_new_worker_distribution(ups_queue, tmp_worker_stats) self._logger.info('queue: {0}, results: {1}'.format(ups_queue, new_workers_per_harvester)) # variables for the harvester command command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue) status = 'new' ack_requested = False lock_interval = None com_interval = None for harvester_id in new_workers_per_harvester: params = new_workers_per_harvester[harvester_id] # TODO: figure out if a command lock call is necessary or how that works taskBuffer.commandToHarvester(harvester_id, command, ack_requested, status, lock_interval, com_interval, params) return
import threading from config import panda_config # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() from dataservice.Merger import Merger from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger # logger _logger = PandaLogger().getLogger('runMerger') _logger.debug("================= start ==================") # overall timeout value overallTimeout = 60 # kill old process try: # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta( minutes=overallTimeout) # get process list scriptName = sys.argv[0] out = commands.getoutput( 'env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName) for line in out.split('\n'): items = line.split()
from config import panda_config # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() from dataservice.Merger import Merger from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger # logger _logger = PandaLogger().getLogger('runMerger') _logger.debug("================= start ==================") # overall timeout value overallTimeout = 60 # kill old process try: # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout) # get process list scriptName = sys.argv[0] out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName) for line in out.split('\n'): items = line.split() # owned process if not items[0] in ['sm','atlpan','root']: # ['os.getlogin()']: doesn't work in cron
return tasks_to_retag def retag_tasks(task_id_list): """ change the share for the selected tasks """ destination_gshare = 'Frontier' reassign_running = True _logger.debug('Reassigning tasks: {0}'.format(task_id_list)) return_code, return_message = taskBuffer.reassignShare( task_id_list, destination_gshare, reassign_running) return return_code, return_message if __name__ == "__main__": # 1. get tasks with frontier failures failure_count_by_task = get_frontier_failure_count_by_task() # 2. filter out tasks by predefined criteria tasks_filtered = filter_tasks(failure_count_by_task) # 3. retag the tasks if tasks_filtered: return_code, return_message = retag_tasks(tasks_filtered) _logger.debug('tasks {0} reassigned with: {1}; {2}'.format( tasks_filtered, return_code, return_message))
taskBuffer.insertNetworkMatrixData(data_combined) # Do some cleanup of old data taskBuffer.deleteOldNetworkData() return True else: return False if __name__ == "__main__": # If no argument, call the basic configurator if len(sys.argv) == 1: t1 = time.time() configurator = Configurator() if not configurator.run(): _logger.critical("Configurator loop FAILED") t2 = time.time() _logger.debug("Configurator run took {0}s".format(t2-t1)) # If --network argument, call the network configurator elif len(sys.argv) == 2 and sys.argv[1].lower() == '--network': t1 = time.time() network_configurator = NetworkConfigurator() if not network_configurator.run(): _logger.critical("Configurator loop FAILED") t2 = time.time() _logger.debug(" run took {0}s".format(t2-t1)) else: _logger.error("Configurator being called with wrong arguments. Use either no arguments or --network")
seconds=overallTimeout - 180) # get process list scriptName = sys.argv[0] out = commands.getoutput( 'env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName) for line in out.split('\n'): items = line.split() # owned process if not items[0] in ['sm', 'atlpan', 'pansrv', 'root' ]: # ['os.getlogin()']: doesn't work in cron continue # look for python if re.search('python', line) == None: continue # PID pid = items[1] # start time timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line) startTime = datetime.datetime( *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: _logger.debug("old process : %s %s" % (pid, startTime)) _logger.debug(line) commands.getoutput('kill -9 %s' % pid) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("kill process : %s %s" % (errtype, errvalue)) # main loop main()
options,args = optP.parse_args() try: # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta(seconds=overallTimeout-180) # get process list scriptName = sys.argv[0] out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName) for line in out.split('\n'): items = line.split() # owned process if not items[0] in ['sm','atlpan','root']: # ['os.getlogin()']: doesn't work in cron continue # look for python if re.search('python',line) == None: continue # PID pid = items[1] # start time timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line) startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: _logger.debug("old process : %s %s" % (pid,startTime)) _logger.debug(line) commands.getoutput('kill -9 %s' % pid) except: errtype,errvalue = sys.exc_info()[:2] _logger.error("kill process : %s %s" % (errtype,errvalue)) # main loop main()
from jobdispatcher.Watcher import Watcher from brokerage.SiteMapper import SiteMapper from dataservice.Finisher import Finisher from dataservice.MailUtils import MailUtils from taskbuffer import ProcessGroups import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('runRebro') _logger.debug("===================== start =====================") # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"): name = line.split()[-1] continue if line.startswith("VmSize:"):
taskBuffer.insertNetworkMatrixData(data_combined) # Do some cleanup of old data taskBuffer.deleteOldNetworkData() return True else: return False if __name__ == "__main__": # If no argument, call the basic configurator if len(sys.argv)==1: t1 = time.time() configurator = Configurator() if not configurator.run(): _logger.critical("Configurator loop FAILED") t2 = time.time() _logger.debug("Configurator run took {0}s".format(t2-t1)) # If --network argument, call the network configurator elif len(sys.argv) == 2 and sys.argv[1].lower() == '--network': t1 = time.time() network_configurator = NetworkConfigurator() if not network_configurator.run(): _logger.critical("Configurator loop FAILED") t2 = time.time() _logger.debug(" run took {0}s".format(t2-t1)) else: _logger.error("Configurator being called with wrong arguments. Use either no arguments or --network")
from dataservice.Finisher import Finisher from dataservice.MailUtils import MailUtils from taskbuffer import ProcessGroups import brokerage.broker_util import brokerage.broker import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('runRebro') _logger.debug("===================== start =====================") # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"): name = line.split()[-1] continue if line.startswith("VmSize:"):
from dataservice.Finisher import Finisher from dataservice.MailUtils import MailUtils from taskbuffer import ProcessGroups import brokerage.broker_util import brokerage.broker import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('backupJobArch') _logger.debug("===================== start =====================") # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"): name = line.split()[-1] continue if line.startswith("VmSize:"):
import os import re import sys import datetime from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger from brokerage.SiteMapper import SiteMapper # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('prioryMassage') _logger.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # get usage breakdown usageBreakDownPerUser = {} usageBreakDownPerSite = {} workingGroupList = [] for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsArchived4']: varMap = {} varMap[':prodSourceLabel'] = 'user' if table == 'ATLAS_PANDA.jobsActive4':
from dataservice.Finisher import Finisher from dataservice.MailUtils import MailUtils from taskbuffer import ProcessGroups import brokerage.broker_util import brokerage.broker import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('backupJobArch') _logger.debug("===================== start =====================") # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"): name = line.split()[-1] continue
import os import re import sys from ftplib import FTP from pandalogger.PandaLogger import PandaLogger # supported architectures targetArchs = ['Linux-slc5-gcc4.3.tar.gz','Linux-slc5_amd64-gcc4.3.tar.gz'] # destination dir destDir = '/data/atlpan/srv/var/appdir' # logger _logger = PandaLogger().getLogger('copyROOT') _logger.debug("===================== start =====================") try: # login to root repository ftp = FTP('root.cern.ch') output = ftp.login() _logger.debug(output) output = ftp.cwd('root') _logger.debug(output) # get list flist = ftp.nlst() # loop over all files for tmpFile in flist: # skip RC if re.search('-rc\d\.',tmpFile) != None: continue
from dataservice.Finisher import Finisher from dataservice.MailUtils import MailUtils from taskbuffer import ProcessGroups import brokerage.broker_util import brokerage.broker import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('deleteJobs') _logger.debug("===================== start =====================") # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"): name = line.split()[-1] continue
from dataservice.Finisher import Finisher from dataservice.MailUtils import MailUtils from taskbuffer import ProcessGroups import brokerage.broker_util import brokerage.broker import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('deleteJobs') _logger.debug("===================== start =====================") # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"): name = line.split()[-1] continue if line.startswith("VmSize:"):
import glob import time import os.path import commands import datetime import threading from config import panda_config from taskbuffer.TaskBuffer import taskBuffer from brokerage import SiteMapper from dataservice.EventPicker import EventPicker from pandalogger.PandaLogger import PandaLogger # logger _logger = PandaLogger().getLogger('evpPD2P') _logger.debug("===================== start =====================") # overall timeout value overallTimeout = 300 # prefix of evp files prefixEVP = 'evp.' # file pattern of evp files evpFilePatt = panda_config.cache_dir + '/' + prefixEVP + '*' # kill old process try: # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta( minutes=overallTimeout) # get process list scriptName = sys.argv[0]
import sys from config import panda_config # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger # logger _logger = PandaLogger().getLogger('boostUser') _logger.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) user = sys.stdin.read() user = user[:-1] sql = "UPDATE atlas_panda.%s set currentPriority=:prio where prodUserName=:uname and prodSourceLabel IN (:label1,:label2) and currentPriority<:prio" varMap = {} varMap[':prio'] = 4000 varMap[':uname'] = user varMap[':label1'] = 'user' varMap[':label2'] = 'panda' for table in ('jobsactive4','jobsdefined4'): _logger.debug((sql % table) + str(varMap)) ret = taskBuffer.querySQLS(sql % table,varMap) _logger.debug('ret -> %s' % str(ret))
from dataservice import DataServiceUtils from dataservice.Closer import Closer from taskbuffer import ProcessGroups import brokerage.broker_util import brokerage.broker import taskbuffer.ErrorCode import dataservice.DDM # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('datasetManager') _logger.debug("===================== start =====================") # use native DQ2 ddm.useDirectDQ2() # memory checker def _memoryCheck(str): try: proc_status = '/proc/%d/status' % os.getpid() procfile = open(proc_status) name = "" vmSize = "" vmRSS = "" # extract Name,VmSize,VmRSS for line in procfile: if line.startswith("Name:"):
# instantiate DB proxies proxyS = DBProxy() proxyS.connect(panda_config.dbhost, panda_config.dbpasswd, panda_config.dbuser, panda_config.dbname) # time limit for dataset closing timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7) # close datasets while True: sql = "SELECT vuid,name,modificationdate FROM Datasets " + \ "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \ "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'" ret, res = proxyS.querySQLS(sql % timeLimit.strftime('%Y-%m-%d %H:%M:%S')) _logger.debug("# of dataset : %s" % len(res)) if len(res) == 0: break for (vuid, name, modDate) in res: _logger.debug("start %s %s" % (modDate, name)) retF, resF = proxyS.querySQLS( "SELECT lfn FROM filesTable4 WHERE destinationDBlock='%s'" % name) if retF < 0 or retF == None or retF != len(resF): _logger.error("SQL error") else: # no files in filesTable if len(resF) == 0: _logger.debug("freeze %s " % name) status, out = ddm.dq2.main(['freezeDataset', name]) if status != 0 or (out.find('Error') != -1 and out.find('DQ2 unknown dataset exception') == -1 \ and out.find('DQ2 security exception') == -1):
import sys from config import panda_config # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger # logger _logger = PandaLogger().getLogger('boostUser') _logger.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) user = sys.stdin.read() user = user[:-1] sql = "UPDATE atlas_panda.%s set currentPriority=:prio where prodUserName=:uname and prodSourceLabel IN (:label1,:label2) and currentPriority<:prio" varMap = {} varMap[':prio'] = 4000 varMap[':uname'] = user varMap[':label1'] = 'user' varMap[':label2'] = 'panda' for table in ('jobsactive4', 'jobsdefined4'): _logger.debug((sql % table) + str(varMap)) ret = taskBuffer.querySQLS(sql % table, varMap) _logger.debug('ret -> %s' % str(ret))
passwd = panda_config.dbpasswd # instantiate DB proxies proxyS = DBProxy() proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname) # time limit for dataset closing timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7) # close datasets while True: sql = "SELECT vuid,name,modificationdate FROM Datasets " + \ "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \ "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'" ret,res = proxyS.querySQLS(sql % timeLimit.strftime('%Y-%m-%d %H:%M:%S')) _logger.debug("# of dataset : %s" % len(res)) if len(res) == 0: break for (vuid,name,modDate) in res: _logger.debug("start %s %s" % (modDate,name)) retF,resF = proxyS.querySQLS("SELECT lfn FROM filesTable4 WHERE destinationDBlock='%s'" % name) if retF<0 or retF == None or retF!=len(resF): _logger.error("SQL error") else: # no files in filesTable if len(resF) == 0: _logger.debug("freeze %s " % name) status,out = ddm.dq2.main(['freezeDataset',name]) if status != 0 or (out.find('Error') != -1 and out.find('DQ2 unknown dataset exception') == -1 \ and out.find('DQ2 security exception') == -1): _logger.error(out)
import glob import time import os.path import commands import datetime import threading from config import panda_config from taskbuffer.TaskBuffer import taskBuffer from brokerage import SiteMapper from dataservice.EventPicker import EventPicker from pandalogger.PandaLogger import PandaLogger # logger _logger = PandaLogger().getLogger('evpPD2P') _logger.debug("===================== start =====================") # overall timeout value overallTimeout = 300 # prefix of evp files prefixEVP = 'evp.' # file pattern of evp files evpFilePatt = panda_config.cache_dir + '/' + prefixEVP + '*' # kill old process try: # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout) # get process list scriptName = sys.argv[0] out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
import commands import threading from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger from dataservice.AdderGen import AdderGen from brokerage.SiteMapper import SiteMapper from pandautils import PandaUtils # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('add') _logger.debug("===================== start =====================") # overall timeout value overallTimeout = 20 # current minute currentMinute = datetime.datetime.utcnow().minute # kill old process try: # time limit timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout) # get process list scriptName = sys.argv[0] out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName) for line in out.split('\n'):
class PilotStreaming: def __init__(self): self._logger = PandaLogger().getLogger('PilotStreaming') return def run(self): """ Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester via the command interface :return: """ # timing time_start = time.time() self._logger.debug('Start.') # get unified pilot streaming (ups) queues ups_queues = taskBuffer.ups_get_queues() self._logger.debug('UPS queues: {0}'.format(ups_queues)) # get worker stats worker_stats = taskBuffer.ups_load_worker_stats() for ups_queue in ups_queues: # get the worker and job stats for the queue try: tmp_worker_stats = worker_stats[ups_queue] self._logger.debug('worker_stats for queue {0}: {1}'.format( ups_queue, tmp_worker_stats)) # tmp_job_stats = job_stats[ups_queue] except KeyError: # skip queue if no data available self._logger.debug( 'No worker stats for queue {0}'.format(ups_queue)) continue new_workers_per_harvester = taskBuffer.ups_new_worker_distribution( ups_queue, tmp_worker_stats) self._logger.info('queue: {0}, results: {1}'.format( ups_queue, new_workers_per_harvester)) # variables for the harvester command command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue) status = 'new' ack_requested = False lock_interval = None com_interval = None for harvester_id in new_workers_per_harvester: params = new_workers_per_harvester[harvester_id] taskBuffer.commandToHarvester(harvester_id, command, ack_requested, status, lock_interval, com_interval, params) # timing time_stop = time.time() self._logger.debug( 'Done. Pilot streaming took: {0} s'.format(time_stop - time_start)) return
import os import re import sys from ftplib import FTP from pandalogger.PandaLogger import PandaLogger # supported architectures targetArchs = ['Linux-slc5-gcc4.3.tar.gz', 'Linux-slc5_amd64-gcc4.3.tar.gz'] # destination dir destDir = '/var/appdir' # logger _logger = PandaLogger().getLogger('copyROOT') _logger.debug("===================== start =====================") try: # login to root repository ftp = FTP('root.cern.ch') output = ftp.login() _logger.debug(output) output = ftp.cwd('root') _logger.debug(output) # get list flist = ftp.nlst() # loop over all files for tmpFile in flist: # skip RC if re.search('-rc\d\.', tmpFile) != None: continue
tasks_to_retag = tasks_filtered_pslabel return tasks_to_retag def retag_tasks(task_id_list): """ change the share for the selected tasks """ destination_gshare = 'Frontier' reassign_running = True _logger.debug('Reassigning tasks: {0}'.format(task_id_list)) return_code, return_message = taskBuffer.reassignShare(task_id_list, destination_gshare, reassign_running) return return_code, return_message if __name__ == "__main__": # 1. get tasks with frontier failures failure_count_by_task = get_frontier_failure_count_by_task() # 2. filter out tasks by predefined criteria tasks_filtered = filter_tasks(failure_count_by_task) # 3. retag the tasks if tasks_filtered: return_code, return_message = retag_tasks(tasks_filtered) _logger.debug('tasks {0} reassigned with: {1}; {2}'.format(tasks_filtered, return_code, return_message))
import os import re import sys import datetime from taskbuffer.TaskBuffer import taskBuffer from pandalogger.PandaLogger import PandaLogger from brokerage.SiteMapper import SiteMapper # password from config import panda_config passwd = panda_config.dbpasswd # logger _logger = PandaLogger().getLogger('shareMgr') _logger.debug("================= start ==================") # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # number of jobs to be activated per queue nJobsPerQueue = 50 # priority threshold prioCutoff = 950 # get high prio jobs without throttling sql = "SELECT distinct computingSite FROM ATLAS_PANDA.jobsActive4 "