Пример #1
0
    def killJobs(self, ids, code=None, verbose=False):
        """Kill jobs. Normal users can kill only their own jobs.
        People with production VOMS role can kill any jobs.
        Running jobs are killed when next heartbeat comes from the pilot.
        Set code=9 if running jobs need to be killed immediately.

           args:
               ids: the list of PandaIDs
               code: specify why the jobs are killed
                     2: expire
                     3: aborted
                     4: expire in waiting
                     7: retry by server
                     8: rebrokerage
                     9: force kill
                     50: kill by JEDI
                     91: kill user jobs with prod role
               verbose: set True to see what's going on
           returns:
               status code
                     0: communication succeeded to the panda server
                     255: communication failure
               the list of clouds (or Nones if tasks are not yet assigned)
        """
        import userinterface.Client as Client
        s,o = Client.killJobs(ids, code=code, verbose=verbose)
Пример #2
0
    def killJobs(self, ids, code=None, verbose=False):
        """Kill jobs. Normal users can kill only their own jobs.
        People with production VOMS role can kill any jobs.
        Running jobs are killed when next heartbeat comes from the pilot.
        Set code=9 if running jobs need to be killed immediately.

           args:
               ids: the list of PandaIDs
               code: specify why the jobs are killed
                     2: expire
                     3: aborted
                     4: expire in waiting
                     7: retry by server
                     8: rebrokerage
                     9: force kill
                     50: kill by JEDI
                     91: kill user jobs with prod role
               verbose: set True to see what's going on
           returns:
               status code
                     0: communication succeeded to the panda server
                     255: communication failure
               the list of clouds (or Nones if tasks are not yet assigned)
        """
        import userinterface.Client as Client
        s, o = Client.killJobs(ids, code=code, verbose=verbose)
Пример #3
0
def killJobs(jobList):
    print 'Kill jobs'
    _logger.debug('Kill jobs')
    _logger.debug(str(jobList))
    s,o = Client.killJobs(jobList) # Code 3 eqs. aborted status
    _logger.debug(o)
    _logger.debug(s)
    _logger.debug("---------------------")

    return o
Пример #4
0
def killJobs(jobList):
    print 'Kill jobs'
    _logger.debug('Kill jobs')
    _logger.debug(str(jobList))
    s, o = Client.killJobs(jobList)  # Code 3 eqs. aborted status
    _logger.debug(o)
    _logger.debug(s)
    _logger.debug("---------------------")

    return o
Пример #5
0
def main():
    i = 4005758
    jobs_list = []
    while i <= 4005758:
        jobs_list.append(i)
        i+= 1
    print jobs_list
    
    s,o = Client.killJobs(jobs_list,srvID=aSrvID)
    for x in o:
        print x
    
    logger.info('done')
Пример #6
0
# password
from config import panda_config
passwd = panda_config.dbpasswd

cloud = sys.argv[1]

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname)

while True:
    # get PandaIDs
    res = proxyS.querySQL("SELECT PandaID FROM jobsWaiting4 WHERE cloud='%s' ORDER BY PandaID" % cloud)
    # escape
    if len(res) == 0:
        break
    # convert to list
    jobs = []
    for id, in res:
        jobs.append(id)
    # reassign
    nJob = 300
    iJob = 0
    while iJob < len(jobs):
        print 'killJobs(%s)' % jobs[iJob:iJob+nJob]
        Client.killJobs(jobs[iJob:iJob+nJob])
        iJob += nJob
        time.sleep(60)

Пример #7
0
                    jobSpec)
                if not fileCheckInJEDI:
                    jobSpec.jobStatus = 'closed'
                    jobSpec.jobSubStatus = 'cojumbo_wrong'
                    jobSpec.taskBufferErrorCode = taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                taskBuffer.archiveJobs([jobSpec], False, True)
        tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(
            len(coJumboTokill)))
        if len(coJumboTokill) > 0:
            jediJobs = list(coJumboTokill)
            nJob = 100
            iJob = 0
            while iJob < len(jediJobs):
                tmpLog.debug(' killing %s' % str(jediJobs[iJob:iJob + nJob]))
                Client.killJobs(jediJobs[iJob:iJob + nJob],
                                51,
                                keepUnmerged=True)
                iJob += nJob
except:
    errStr = traceback.format_exc()
    tmpLog.error(errStr)

tmpLog.debug("Fork session")


# thread for fork
class ForkThr(threading.Thread):
    def __init__(self, fileName):
        threading.Thread.__init__(self)
        self.fileName = fileName
Пример #8
0
jobs = []

varMap = {}
varMap[':prodSourceLabel']  = 'managed'
varMap[':taskID']   = args[0]
varMap[':pandaIDl'] = args[1]
varMap[':pandaIDu'] = args[2]
sql = "SELECT PandaID FROM %s WHERE prodSourceLabel=:prodSourceLabel AND taskID=:taskID AND PandaID BETWEEN :pandaIDl AND :pandaIDu ORDER BY PandaID"
for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsWaiting4','ATLAS_PANDA.jobsDefined4']:
    status,res = proxyS.querySQLS(sql % table,varMap)
    if res != None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)

print 'The number of jobs to be killed : %s' % len(jobs)            
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'kill %s' % str(jobs[iJob:iJob+nJob])
        if options.forceKill:
            Client.killJobs(jobs[iJob:iJob+nJob],9,useMailAsID=useMailAsIDV)
        else:
            Client.killJobs(jobs[iJob:iJob+nJob],useMailAsID=useMailAsIDV)
        iJob += nJob
        time.sleep(1)
                        

Пример #9
0
import subprocess
import random
from termcolor2 import c

import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec


aSrvID = None

#for idx,argv in enumerate(sys.argv):
#    if argv == '-s':
#        aSrvID = sys.argv[idx+1]
#        sys.argv = sys.argv[:idx]
#        break

jobs_to_kill = []
for j in sys.argv[1:]:
    if j.isdigit():
        jobs_to_kill.append(j)

s, o = Client.killJobs(jobs_to_kill, verbose=False )

print("Job killing results:\n=============================")
for i in range(len(jobs_to_kill)):
    if o[i]:
        print(c("%s: %s" % (jobs_to_kill[i], 'success' if o[i] else 'failed')).green)
    else:
        print(c("%s: %s" % (jobs_to_kill[i], 'success' if o[i] else 'failed')).red)
Пример #10
0
import sys

import userinterface.Client as Client

if len(sys.argv) == 2:
    jobDefIDs = [sys.argv[1]]
else:
    startID = int(sys.argv[1])
    endID = int(sys.argv[2])
    if startID > endID:
        print '%d is less than %d' % (endID, startID)
        sys.exit(1)
    jobDefIDs = range(startID, endID + 1)

# quesry PandaID
status, ids = Client.queryPandaIDs(jobDefIDs)

if status != 0:
    sys.exit(0)

# remove None
while True:
    if not None in ids:
        break
    ids.remove(None)

# kill
if len(ids) != 0:
    Client.killJobs(ids)
Пример #11
0
options,args = optP.parse_args()


aSrvID = None

codeV = None
useMailAsIDV = False

if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
else:
    try:
        codeV = int(options.codeV)
    except Exception:
        pass
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]], code=codeV, useMailAsID=useMailAsIDV, keepUnmerged=options.keepUnmerged, jobSubStatus=options.jobSubStatus)
else:
    startID = int(args[0])
    endID   = int(args[1])
    if startID > endID:
        print '%d is less than %d' % (endID,startID)
        sys.exit(1)
    Client.killJobs(range(startID,endID+1),code=codeV,useMailAsID=useMailAsIDV,keepUnmerged=options.keepUnmerged, jobSubStatus=options.jobSubStatus)

Пример #12
0
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsDefined4 "
else:
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsActive4 "
sql += "WHERE jobStatus=:jobStatus AND computingSite=:computingSite AND modificationTime<:modificationTime AND prodSourceLabel=:prodSourceLabel ORDER BY PandaID"
status, res = proxyS.querySQLS(sql, varMap)

print "got {0} jobs".format(len(res))

jobs = []
jediJobs = []
if res != None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob])
        Client.killJobs(jediJobs[iJob:iJob + nJob], 51)
        iJob += nJob
Пример #13
0
import sys

import userinterface.Client as Client

if len(sys.argv) == 2:
    jobDefIDs = [sys.argv[1]]
else:
    startID = int(sys.argv[1])
    endID   = int(sys.argv[2])
    if startID > endID:
        print '%d is less than %d' % (endID,startID)
        sys.exit(1)
    jobDefIDs = range(startID,endID+1)
    
# quesry PandaID
status, ids = Client.queryPandaIDs(jobDefIDs)

if status != 0:
    sys.exit(0)
    
# remove None
while True:
    if not None in ids:
        break
    ids.remove(None)

# kill
if len(ids) != 0:
    Client.killJobs(ids)

Пример #14
0
            jobSpecs = taskBuffer.peekJobs(coJumboW,fromDefined=False,fromActive=False,fromArchived=False,fromWaiting=True)
            for jobSpec in jobSpecs:
                fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(jobSpec)
                if not fileCheckInJEDI:
                    jobSpec.jobStatus = 'closed'
                    jobSpec.jobSubStatus = 'cojumbo_wrong'
                    jobSpec.taskBufferErrorCode = taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                taskBuffer.archiveJobs([jobSpec],False,True)
        tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(len(coJumboTokill)))
        if len(coJumboTokill) > 0:
            jediJobs = list(coJumboTokill)
            nJob = 100
            iJob = 0
            while iJob < len(jediJobs):
                tmpLog.debug(' killing %s' % str(jediJobs[iJob:iJob+nJob]))
                Client.killJobs(jediJobs[iJob:iJob+nJob],51,keepUnmerged=True)
                iJob += nJob
except:
    errStr = traceback.format_exc()
    tmpLog.error(errStr)


tmpLog.debug("Fork session")
# thread for fork
class ForkThr (threading.Thread):
    def __init__(self,fileName):
        threading.Thread.__init__(self)
        self.fileName = fileName

    def run(self):
        setupStr = 'source /etc/sysconfig/panda_server; '
Пример #15
0
import datetime
from taskbuffer.DBProxy import DBProxy
import userinterface.Client as Client

# password
from config import panda_config
passwd = panda_config.dbpasswd

# time limit
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=1)

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect('adbpro.usatlas.bnl.gov',passwd,'panda-developer','PandaDevDB')

# get PandaIDs from jobsDefined
res = proxyS.querySQL("SELECT PandaID,modificationTime from jobsDefined4 ORDER BY modificationTime")

# kill f old
jobs=[]
for (id,modTime) in res:
    if modTime < timeLimit:
        jobs.append(id)

Client.killJobs(jobs)

Пример #16
0
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsActive4 "
sql += "WHERE jobStatus=:jobStatus AND computingSite=:computingSite AND modificationTime<:modificationTime AND prodSourceLabel=:prodSourceLabel ORDER BY PandaID"
status,res = proxyS.querySQLS(sql,varMap)

print "got {0} jobs".format(len(res))

jobs = []
jediJobs = []
if res != None:
    for (id,lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob+nJob])
        Client.reassignJobs(jobs[iJob:iJob+nJob])
        iJob += nJob
if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob+nJob])
        Client.killJobs(jediJobs[iJob:iJob+nJob],51)
        iJob += nJob


Пример #17
0
if options.prodSourceLabel != None:
    varMap[':src3'] = options.prodSourceLabel
    srcSQL += ',:src3'
srcSQL += ')'

jobs = []
tables = ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsWaiting4','ATLAS_PANDA.jobsDefined4']
for table in tables:
    sql = "SELECT PandaID FROM %s WHERE prodUserName=:prodUserName AND prodSourceLabel IN %s " % (table,srcSQL)
    if options.jobID != None:
        sql += "AND jobDefinitionID=:jobDefinitionID "
    if not options.jobsetID in (None,'all'):
        sql += "AND jobsetID=:jobsetID "
    sql += "ORDER BY PandaID "
    status,res = proxyS.querySQLS(sql,varMap)
    if res != None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)
if len(jobs):
    iJob = 0
    nJob = 1000
    while iJob < len(jobs):
        subJobs = jobs[iJob:iJob+nJob]
        print "kill %s %s/%s" % (str(subJobs),iJob,len(jobs))
        Client.killJobs(subJobs,code=9)
        iJob += nJob 
else:
    print "no job was killed" 

Пример #18
0
srcSQL += ')'

jobs = []
tables = [
    'ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsWaiting4',
    'ATLAS_PANDA.jobsDefined4'
]
for table in tables:
    sql = "SELECT PandaID FROM %s WHERE prodUserName=:prodUserName AND prodSourceLabel IN %s " % (
        table, srcSQL)
    if options.jobID != None:
        sql += "AND jobDefinitionID=:jobDefinitionID "
    if not options.jobsetID in (None, 'all'):
        sql += "AND jobsetID=:jobsetID "
    sql += "ORDER BY PandaID "
    status, res = proxyS.querySQLS(sql, varMap)
    if res != None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)
if len(jobs):
    iJob = 0
    nJob = 1000
    while iJob < len(jobs):
        subJobs = jobs[iJob:iJob + nJob]
        print "kill %s %s/%s" % (str(subJobs), iJob, len(jobs))
        Client.killJobs(subJobs, code=9)
        iJob += nJob
else:
    print "no job was killed"
Пример #19
0
if res != None:
    for (id,lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob+nJob])
        Client.reassignJobs(jobs[iJob:iJob+nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob+nJob])
        Client.killJobs(jediJobs[iJob:iJob+nJob],codeV,keepUnmerged=options.keepUnmerged)
        iJob += nJob

print
print 'reassigned {0} jobs'.format(len(jobs+jediJobs))


Пример #20
0
if res != None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob])
        Client.killJobs(jediJobs[iJob:iJob + nJob],
                        codeV,
                        keepUnmerged=options.keepUnmerged)
        iJob += nJob

print
print 'reassigned {0} jobs'.format(len(jobs + jediJobs))
Пример #21
0
                jobsMap[prio] = []
            if not id in jobsMap[prio]:
                jobsMap[prio].append(id)

# order by PandaID and currentPriority
jobs = []
prioList = jobsMap.keys()
prioList.sort()
for prio in prioList:
    # reverse order by PandaID to kill newer jobs
    ids = jobsMap[prio]
    ids.sort()
    ids.reverse()
    jobs += ids

if options.maxJobs != None:
    jobs = jobs[:int(options.maxJobs)]

print 'The number of jobs with priorities below %s : %s' % (args[0], len(jobs))
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'kill %s' % str(jobs[iJob:iJob + nJob])
        if options.forceKill:
            Client.killJobs(jobs[iJob:iJob + nJob], 9)
        else:
            Client.killJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(1)
Пример #22
0
                help='kill user jobs using a production role')
options, args = optP.parse_args()

aSrvID = None

codeV = None
useMailAsIDV = False

if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]],
                    code=codeV,
                    useMailAsID=useMailAsIDV,
                    keepUnmerged=options.keepUnmerged)
else:
    startID = int(args[0])
    endID = int(args[1])
    if startID > endID:
        print '%d is less than %d' % (endID, startID)
        sys.exit(1)
    Client.killJobs(range(startID, endID + 1),
                    code=codeV,
                    useMailAsID=useMailAsIDV,
                    keepUnmerged=options.keepUnmerged)
Пример #23
0
                default=False,help='kill jobs before next heartbeat is coming')
optP.add_option('--killOwnProdJobs',action='store_const',const=True,dest='killOwnProdJobs',
                default=False,help='kill own production jobs without a production role')
optP.add_option('--killUserJobs',action='store_const',const=True,dest='killUserJobs',
                default=False,help='kill user jobs using a production role')
options,args = optP.parse_args()


aSrvID = None

codeV = None
useMailAsIDV = False

if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]],code=codeV,useMailAsID=useMailAsIDV)
else:
    startID = int(args[0])
    endID   = int(args[1])
    if startID > endID:
        print '%d is less than %d' % (endID,startID)
        sys.exit(1)
    Client.killJobs(range(startID,endID+1),code=codeV,useMailAsID=useMailAsIDV)

Пример #24
0
sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsWaiting4 WHERE jobStatus=:jobStatus AND taskID=:taskID AND modificationTime<:modificationTime "
status, res = proxyS.querySQLS(sql, varMap)
if res != None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print 'reassign  %s' % str(jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print 'kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob])
        Client.killJobs(jediJobs[iJob:iJob + nJob], codeV)
        iJob += nJob

print
print 'reassigned {0} jobs'.format(len(jobs + jediJobs))