示例#1
0
    def killJobs(self, ids, code=None, verbose=False):
        """Kill jobs. Normal users can kill only their own jobs.
        People with production VOMS role can kill any jobs.
        Running jobs are killed when next heartbeat comes from the pilot.
        Set code=9 if running jobs need to be killed immediately.

           args:
               ids: the list of PandaIDs
               code: specify why the jobs are killed
                     2: expire
                     3: aborted
                     4: expire in waiting
                     7: retry by server
                     8: rebrokerage
                     9: force kill
                     50: kill by JEDI
                     91: kill user jobs with prod role
               verbose: set True to see what's going on
           returns:
               status code
                     0: communication succeeded to the panda server
                     255: communication failure
               the list of clouds (or Nones if tasks are not yet assigned)
        """
        import userinterface.Client as Client
        s,o = Client.killJobs(ids, code=code, verbose=verbose)
示例#2
0
    def killJobs(self, ids, code=None, verbose=False):
        """Kill jobs. Normal users can kill only their own jobs.
        People with production VOMS role can kill any jobs.
        Running jobs are killed when next heartbeat comes from the pilot.
        Set code=9 if running jobs need to be killed immediately.

           args:
               ids: the list of PandaIDs
               code: specify why the jobs are killed
                     2: expire
                     3: aborted
                     4: expire in waiting
                     7: retry by server
                     8: rebrokerage
                     9: force kill
                     50: kill by JEDI
                     91: kill user jobs with prod role
               verbose: set True to see what's going on
           returns:
               status code
                     0: communication succeeded to the panda server
                     255: communication failure
               the list of clouds (or Nones if tasks are not yet assigned)
        """
        import userinterface.Client as Client
        s, o = Client.killJobs(ids, code=code, verbose=verbose)
示例#3
0
def killJobs(jobList):
    print 'Kill jobs'
    _logger.debug('Kill jobs')
    _logger.debug(str(jobList))
    s,o = Client.killJobs(jobList) # Code 3 eqs. aborted status
    _logger.debug(o)
    _logger.debug(s)
    _logger.debug("---------------------")

    return o
示例#4
0
def killJobs(jobList):
    print 'Kill jobs'
    _logger.debug('Kill jobs')
    _logger.debug(str(jobList))
    s, o = Client.killJobs(jobList)  # Code 3 eqs. aborted status
    _logger.debug(o)
    _logger.debug(s)
    _logger.debug("---------------------")

    return o
示例#5
0
def main():
    i = 4005758
    jobs_list = []
    while i <= 4005758:
        jobs_list.append(i)
        i+= 1
    print jobs_list
    
    s,o = Client.killJobs(jobs_list,srvID=aSrvID)
    for x in o:
        print x
    
    logger.info('done')
# password
from config import panda_config
passwd = panda_config.dbpasswd

cloud = sys.argv[1]

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname)

while True:
    # get PandaIDs
    res = proxyS.querySQL("SELECT PandaID FROM jobsWaiting4 WHERE cloud='%s' ORDER BY PandaID" % cloud)
    # escape
    if len(res) == 0:
        break
    # convert to list
    jobs = []
    for id, in res:
        jobs.append(id)
    # reassign
    nJob = 300
    iJob = 0
    while iJob < len(jobs):
        print 'killJobs(%s)' % jobs[iJob:iJob+nJob]
        Client.killJobs(jobs[iJob:iJob+nJob])
        iJob += nJob
        time.sleep(60)

示例#7
0
                    jobSpec)
                if not fileCheckInJEDI:
                    jobSpec.jobStatus = 'closed'
                    jobSpec.jobSubStatus = 'cojumbo_wrong'
                    jobSpec.taskBufferErrorCode = taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                taskBuffer.archiveJobs([jobSpec], False, True)
        tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(
            len(coJumboTokill)))
        if len(coJumboTokill) > 0:
            jediJobs = list(coJumboTokill)
            nJob = 100
            iJob = 0
            while iJob < len(jediJobs):
                tmpLog.debug(' killing %s' % str(jediJobs[iJob:iJob + nJob]))
                Client.killJobs(jediJobs[iJob:iJob + nJob],
                                51,
                                keepUnmerged=True)
                iJob += nJob
except:
    errStr = traceback.format_exc()
    tmpLog.error(errStr)

tmpLog.debug("Fork session")


# thread for fork
class ForkThr(threading.Thread):
    def __init__(self, fileName):
        threading.Thread.__init__(self)
        self.fileName = fileName
jobs = []

varMap = {}
varMap[':prodSourceLabel']  = 'managed'
varMap[':taskID']   = args[0]
varMap[':pandaIDl'] = args[1]
varMap[':pandaIDu'] = args[2]
sql = "SELECT PandaID FROM %s WHERE prodSourceLabel=:prodSourceLabel AND taskID=:taskID AND PandaID BETWEEN :pandaIDl AND :pandaIDu ORDER BY PandaID"
for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsWaiting4','ATLAS_PANDA.jobsDefined4']:
    status,res = proxyS.querySQLS(sql % table,varMap)
    if res != None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)

print 'The number of jobs to be killed : %s' % len(jobs)            
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'kill %s' % str(jobs[iJob:iJob+nJob])
        if options.forceKill:
            Client.killJobs(jobs[iJob:iJob+nJob],9,useMailAsID=useMailAsIDV)
        else:
            Client.killJobs(jobs[iJob:iJob+nJob],useMailAsID=useMailAsIDV)
        iJob += nJob
        time.sleep(1)
                        

import subprocess
import random
from termcolor2 import c

import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec


aSrvID = None

#for idx,argv in enumerate(sys.argv):
#    if argv == '-s':
#        aSrvID = sys.argv[idx+1]
#        sys.argv = sys.argv[:idx]
#        break

jobs_to_kill = []
for j in sys.argv[1:]:
    if j.isdigit():
        jobs_to_kill.append(j)

s, o = Client.killJobs(jobs_to_kill, verbose=False )

print("Job killing results:\n=============================")
for i in range(len(jobs_to_kill)):
    if o[i]:
        print(c("%s: %s" % (jobs_to_kill[i], 'success' if o[i] else 'failed')).green)
    else:
        print(c("%s: %s" % (jobs_to_kill[i], 'success' if o[i] else 'failed')).red)
示例#10
0
import sys

import userinterface.Client as Client

if len(sys.argv) == 2:
    jobDefIDs = [sys.argv[1]]
else:
    startID = int(sys.argv[1])
    endID = int(sys.argv[2])
    if startID > endID:
        print '%d is less than %d' % (endID, startID)
        sys.exit(1)
    jobDefIDs = range(startID, endID + 1)

# quesry PandaID
status, ids = Client.queryPandaIDs(jobDefIDs)

if status != 0:
    sys.exit(0)

# remove None
while True:
    if not None in ids:
        break
    ids.remove(None)

# kill
if len(ids) != 0:
    Client.killJobs(ids)
示例#11
0
options,args = optP.parse_args()


aSrvID = None

codeV = None
useMailAsIDV = False

if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
else:
    try:
        codeV = int(options.codeV)
    except Exception:
        pass
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]], code=codeV, useMailAsID=useMailAsIDV, keepUnmerged=options.keepUnmerged, jobSubStatus=options.jobSubStatus)
else:
    startID = int(args[0])
    endID   = int(args[1])
    if startID > endID:
        print '%d is less than %d' % (endID,startID)
        sys.exit(1)
    Client.killJobs(range(startID,endID+1),code=codeV,useMailAsID=useMailAsIDV,keepUnmerged=options.keepUnmerged, jobSubStatus=options.jobSubStatus)

示例#12
0
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsDefined4 "
else:
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsActive4 "
sql += "WHERE jobStatus=:jobStatus AND computingSite=:computingSite AND modificationTime<:modificationTime AND prodSourceLabel=:prodSourceLabel ORDER BY PandaID"
status, res = proxyS.querySQLS(sql, varMap)

print "got {0} jobs".format(len(res))

jobs = []
jediJobs = []
if res != None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob])
        Client.killJobs(jediJobs[iJob:iJob + nJob], 51)
        iJob += nJob
示例#13
0
import sys

import userinterface.Client as Client

if len(sys.argv) == 2:
    jobDefIDs = [sys.argv[1]]
else:
    startID = int(sys.argv[1])
    endID   = int(sys.argv[2])
    if startID > endID:
        print '%d is less than %d' % (endID,startID)
        sys.exit(1)
    jobDefIDs = range(startID,endID+1)
    
# quesry PandaID
status, ids = Client.queryPandaIDs(jobDefIDs)

if status != 0:
    sys.exit(0)
    
# remove None
while True:
    if not None in ids:
        break
    ids.remove(None)

# kill
if len(ids) != 0:
    Client.killJobs(ids)

示例#14
0
            jobSpecs = taskBuffer.peekJobs(coJumboW,fromDefined=False,fromActive=False,fromArchived=False,fromWaiting=True)
            for jobSpec in jobSpecs:
                fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(jobSpec)
                if not fileCheckInJEDI:
                    jobSpec.jobStatus = 'closed'
                    jobSpec.jobSubStatus = 'cojumbo_wrong'
                    jobSpec.taskBufferErrorCode = taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                taskBuffer.archiveJobs([jobSpec],False,True)
        tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(len(coJumboTokill)))
        if len(coJumboTokill) > 0:
            jediJobs = list(coJumboTokill)
            nJob = 100
            iJob = 0
            while iJob < len(jediJobs):
                tmpLog.debug(' killing %s' % str(jediJobs[iJob:iJob+nJob]))
                Client.killJobs(jediJobs[iJob:iJob+nJob],51,keepUnmerged=True)
                iJob += nJob
except:
    errStr = traceback.format_exc()
    tmpLog.error(errStr)


tmpLog.debug("Fork session")
# thread for fork
class ForkThr (threading.Thread):
    def __init__(self,fileName):
        threading.Thread.__init__(self)
        self.fileName = fileName

    def run(self):
        setupStr = 'source /etc/sysconfig/panda_server; '
示例#15
0
import datetime
from taskbuffer.DBProxy import DBProxy
import userinterface.Client as Client

# password
from config import panda_config
passwd = panda_config.dbpasswd

# time limit
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=1)

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect('adbpro.usatlas.bnl.gov',passwd,'panda-developer','PandaDevDB')

# get PandaIDs from jobsDefined
res = proxyS.querySQL("SELECT PandaID,modificationTime from jobsDefined4 ORDER BY modificationTime")

# kill f old
jobs=[]
for (id,modTime) in res:
    if modTime < timeLimit:
        jobs.append(id)

Client.killJobs(jobs)

示例#16
0
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsActive4 "
sql += "WHERE jobStatus=:jobStatus AND computingSite=:computingSite AND modificationTime<:modificationTime AND prodSourceLabel=:prodSourceLabel ORDER BY PandaID"
status,res = proxyS.querySQLS(sql,varMap)

print "got {0} jobs".format(len(res))

jobs = []
jediJobs = []
if res != None:
    for (id,lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob+nJob])
        Client.reassignJobs(jobs[iJob:iJob+nJob])
        iJob += nJob
if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob+nJob])
        Client.killJobs(jediJobs[iJob:iJob+nJob],51)
        iJob += nJob


示例#17
0
if options.prodSourceLabel != None:
    varMap[':src3'] = options.prodSourceLabel
    srcSQL += ',:src3'
srcSQL += ')'

jobs = []
tables = ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsWaiting4','ATLAS_PANDA.jobsDefined4']
for table in tables:
    sql = "SELECT PandaID FROM %s WHERE prodUserName=:prodUserName AND prodSourceLabel IN %s " % (table,srcSQL)
    if options.jobID != None:
        sql += "AND jobDefinitionID=:jobDefinitionID "
    if not options.jobsetID in (None,'all'):
        sql += "AND jobsetID=:jobsetID "
    sql += "ORDER BY PandaID "
    status,res = proxyS.querySQLS(sql,varMap)
    if res != None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)
if len(jobs):
    iJob = 0
    nJob = 1000
    while iJob < len(jobs):
        subJobs = jobs[iJob:iJob+nJob]
        print "kill %s %s/%s" % (str(subJobs),iJob,len(jobs))
        Client.killJobs(subJobs,code=9)
        iJob += nJob 
else:
    print "no job was killed" 

示例#18
0
srcSQL += ')'

jobs = []
tables = [
    'ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsWaiting4',
    'ATLAS_PANDA.jobsDefined4'
]
for table in tables:
    sql = "SELECT PandaID FROM %s WHERE prodUserName=:prodUserName AND prodSourceLabel IN %s " % (
        table, srcSQL)
    if options.jobID != None:
        sql += "AND jobDefinitionID=:jobDefinitionID "
    if not options.jobsetID in (None, 'all'):
        sql += "AND jobsetID=:jobsetID "
    sql += "ORDER BY PandaID "
    status, res = proxyS.querySQLS(sql, varMap)
    if res != None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)
if len(jobs):
    iJob = 0
    nJob = 1000
    while iJob < len(jobs):
        subJobs = jobs[iJob:iJob + nJob]
        print "kill %s %s/%s" % (str(subJobs), iJob, len(jobs))
        Client.killJobs(subJobs, code=9)
        iJob += nJob
else:
    print "no job was killed"
示例#19
0
if res != None:
    for (id,lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob+nJob])
        Client.reassignJobs(jobs[iJob:iJob+nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob+nJob])
        Client.killJobs(jediJobs[iJob:iJob+nJob],codeV,keepUnmerged=options.keepUnmerged)
        iJob += nJob

print
print 'reassigned {0} jobs'.format(len(jobs+jediJobs))


示例#20
0
if res != None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob])
        Client.killJobs(jediJobs[iJob:iJob + nJob],
                        codeV,
                        keepUnmerged=options.keepUnmerged)
        iJob += nJob

print
print 'reassigned {0} jobs'.format(len(jobs + jediJobs))
                jobsMap[prio] = []
            if not id in jobsMap[prio]:
                jobsMap[prio].append(id)

# order by PandaID and currentPriority
jobs = []
prioList = jobsMap.keys()
prioList.sort()
for prio in prioList:
    # reverse order by PandaID to kill newer jobs
    ids = jobsMap[prio]
    ids.sort()
    ids.reverse()
    jobs += ids

if options.maxJobs != None:
    jobs = jobs[:int(options.maxJobs)]

print 'The number of jobs with priorities below %s : %s' % (args[0], len(jobs))
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'kill %s' % str(jobs[iJob:iJob + nJob])
        if options.forceKill:
            Client.killJobs(jobs[iJob:iJob + nJob], 9)
        else:
            Client.killJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(1)
示例#22
0
                help='kill user jobs using a production role')
options, args = optP.parse_args()

aSrvID = None

codeV = None
useMailAsIDV = False

if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]],
                    code=codeV,
                    useMailAsID=useMailAsIDV,
                    keepUnmerged=options.keepUnmerged)
else:
    startID = int(args[0])
    endID = int(args[1])
    if startID > endID:
        print '%d is less than %d' % (endID, startID)
        sys.exit(1)
    Client.killJobs(range(startID, endID + 1),
                    code=codeV,
                    useMailAsID=useMailAsIDV,
                    keepUnmerged=options.keepUnmerged)
示例#23
0
                default=False,help='kill jobs before next heartbeat is coming')
optP.add_option('--killOwnProdJobs',action='store_const',const=True,dest='killOwnProdJobs',
                default=False,help='kill own production jobs without a production role')
optP.add_option('--killUserJobs',action='store_const',const=True,dest='killUserJobs',
                default=False,help='kill user jobs using a production role')
options,args = optP.parse_args()


aSrvID = None

codeV = None
useMailAsIDV = False

if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]],code=codeV,useMailAsID=useMailAsIDV)
else:
    startID = int(args[0])
    endID   = int(args[1])
    if startID > endID:
        print '%d is less than %d' % (endID,startID)
        sys.exit(1)
    Client.killJobs(range(startID,endID+1),code=codeV,useMailAsID=useMailAsIDV)

示例#24
0
sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsWaiting4 WHERE jobStatus=:jobStatus AND taskID=:taskID AND modificationTime<:modificationTime "
status, res = proxyS.querySQLS(sql, varMap)
if res != None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob])
        Client.killJobs(jediJobs[iJob:iJob + nJob], codeV)
        iJob += nJob

print
print 'reassigned {0} jobs'.format(len(jobs + jediJobs))