Exemplo n.º 1
0
        # look for python
        if re.search('python',line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
        startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid,startTime))
            _logger.debug(line)            
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type,value))

# instantiate PD2P
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)
siteMapper = SiteMapper.SiteMapper(taskBuffer)


# thread pool
class ThreadPool:
    def __init__(self):
        self.lock = threading.Lock()
        self.list = []

    def add(self,obj):
        self.lock.acquire()
        self.list.append(obj)
Exemplo n.º 2
0
    options,args = optP.parse_args()
    try:
        # time limit
        timeLimit = datetime.datetime.utcnow() - datetime.timedelta(seconds=overallTimeout-180)
        # get process list
        scriptName = sys.argv[0]
        out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
        for line in out.split('\n'):
            items = line.split()
            # owned process
            if not items[0] in ['sm','atlpan','root']: # ['os.getlogin()']: doesn't work in cron
                continue
            # look for python
            if re.search('python',line) == None:
                continue
            # PID
            pid = items[1]
            # start time
            timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
            startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
            # kill old process
            if startTime < timeLimit:
                _logger.debug("old process : %s %s" % (pid,startTime))
                _logger.debug(line)            
                commands.getoutput('kill -9 %s' % pid)
    except:
        errtype,errvalue = sys.exc_info()[:2]
        _logger.error("kill process : %s %s" % (errtype,errvalue))
    # main loop    
    main()
Exemplo n.º 3
0
        # look for python
        if re.search('python',line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
        startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid,startTime))
            _logger.debug(line)            
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type,value))
    

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

_memoryCheck("rebroker")

# rebrokerage
_logger.debug("Rebrokerage start")

# get timeout value
timeoutVal = taskBuffer.getConfigValue('rebroker','ANALY_TIMEOUT')
Exemplo n.º 4
0
        # look for python
        if re.search('python',line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
        startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid,startTime))
            _logger.debug(line)            
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type,value))
    

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

_memoryCheck("rebroker")

# rebrokerage
_logger.debug("Rebrokerage start")
try:
    normalTimeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
    sortTimeLimit   = datetime.datetime.utcnow() - datetime.timedelta(hours=3)
Exemplo n.º 5
0
# close datasets
while True:
    sql = "SELECT vuid,name,modificationdate FROM Datasets " + \
          "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \
          "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'"
    ret, res = proxyS.querySQLS(sql % timeLimit.strftime('%Y-%m-%d %H:%M:%S'))
    _logger.debug("# of dataset : %s" % len(res))
    if len(res) == 0:
        break
    for (vuid, name, modDate) in res:
        _logger.debug("start %s %s" % (modDate, name))
        retF, resF = proxyS.querySQLS(
            "SELECT lfn FROM filesTable4 WHERE destinationDBlock='%s'" % name)
        if retF < 0 or retF == None or retF != len(resF):
            _logger.error("SQL error")
        else:
            # no files in filesTable
            if len(resF) == 0:
                _logger.debug("freeze %s " % name)
                status, out = ddm.dq2.main(['freezeDataset', name])
                if status != 0 or (out.find('Error') != -1 and out.find('DQ2 unknown dataset exception') == -1 \
                                   and out.find('DQ2 security exception') == -1):
                    _logger.error(out)
                else:
                    proxyS.querySQL(
                        "UPDATE Datasets SET status='completed',modificationdate=UTC_TIMESTAMP() WHERE vuid='%s'"
                        % vuid)
            else:
                _logger.debug("wait %s " % name)
                proxyS.querySQL(
Exemplo n.º 6
0
        # look for python
        if re.search('python',line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
        startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid,startTime))
            _logger.debug(line)            
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type,value))
    

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)


# table names
jobATableName   = "ATLAS_PANDAARCH.jobsArchived"
filesATableName = "ATLAS_PANDAARCH.filesTable_ARCH"
paramATableName = "ATLAS_PANDAARCH.jobParamsTable_ARCH"
metaATableName  = "ATLAS_PANDAARCH.metaTable_ARCH"
Exemplo n.º 7
0
            taskBuffer.insertNetworkMatrixData(data_combined)
            # Do some cleanup of old data
            taskBuffer.deleteOldNetworkData()
            return True
        else:
            return False

if __name__ == "__main__":

    # If no argument, call the basic configurator
    if len(sys.argv)==1:
        t1 = time.time()
        configurator = Configurator()
        if not configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug("Configurator run took {0}s".format(t2-t1))

    # If --network argument, call the network configurator
    elif len(sys.argv) == 2 and sys.argv[1].lower() == '--network':
        t1 = time.time()
        network_configurator = NetworkConfigurator()
        if not network_configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug(" run took {0}s".format(t2-t1))

    else:
        _logger.error("Configurator being called with wrong arguments. Use either no arguments or --network")

Exemplo n.º 8
0
        # look for python
        if re.search('python',line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
        startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid,startTime))
            _logger.debug(line)            
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type,value))

    
# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
aSiteMapper = SiteMapper(taskBuffer)

# delete
_logger.debug("Del session")
status,retSel = taskBuffer.querySQLS("SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4",{})
if retSel != None:
    try:
        maxID = retSel[0][0]
        _logger.debug("maxID : %s" % maxID)
Exemplo n.º 9
0
                    lsize = os.path.getsize(dstFileName)
                    if lsize == rsize:
                        _logger.debug('skip since alredy there %s' % tmpFile)
                        continue
            # copy
            _logger.debug('copy %s' % tmpFile)
            outFile = open(dstFileName, 'wb')
            ftp.retrbinary('RETR %s' % tmpFile, outFile.write)
            outFile.close()
            _logger.debug('end %s' % tmpFile)
    # quit
    output = ftp.quit()
    _logger.debug(output)
    # make list
    listFileName = 'applist'
    listFilePath = '%s/%s' % (destDir, listFileName)
    listFile = open(listFilePath, 'w')
    for tmpFile in os.listdir(destDir):
        # skip hidden files
        if tmpFile.startswith('.'):
            continue
        # skip applist
        if tmpFile == listFileName:
            continue
        listFile.write('%s\n' % tmpFile)
    listFile.close()
except:
    errType, errValue = sys.exc_info()[:2]
    _logger.error("Failed with %s %s" % (errType, errValue))

_logger.debug("===================== end =====================")
Exemplo n.º 10
0
        # look for python
        if re.search('python',line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
        startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old dq2 process : %s %s" % (pid,startTime))
            _logger.debug(line)            
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill dq2 process : %s %s" % (type,value))


# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=7)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput('ps axo user,pid,lstart,args | grep %s' % scriptName)
    for line in out.split('\n'):
        items = line.split()
        # owned process
        if not items[0] in ['sm','atlpan','root']: # ['os.getlogin()']: doesn't work in cron
            continue
        # look for python
Exemplo n.º 11
0
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7)

# close datasets
while True:
    sql = "SELECT vuid,name,modificationdate FROM Datasets " + \
          "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \
          "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'"
    ret,res = proxyS.querySQLS(sql % timeLimit.strftime('%Y-%m-%d %H:%M:%S'))
    _logger.debug("# of dataset : %s" % len(res))
    if len(res) == 0:
        break
    for (vuid,name,modDate) in res:
        _logger.debug("start %s %s" % (modDate,name))
        retF,resF = proxyS.querySQLS("SELECT lfn FROM filesTable4 WHERE destinationDBlock='%s'" % name)
        if retF<0 or retF == None or retF!=len(resF):
            _logger.error("SQL error")
        else:
            # no files in filesTable
            if len(resF) == 0:
                _logger.debug("freeze %s " % name)
                status,out = ddm.dq2.main(['freezeDataset',name])
                if status != 0 or (out.find('Error') != -1 and out.find('DQ2 unknown dataset exception') == -1 \
                                   and out.find('DQ2 security exception') == -1):
                    _logger.error(out)
                else:
                    proxyS.querySQL("UPDATE Datasets SET status='completed',modificationdate=UTC_TIMESTAMP() WHERE vuid='%s'" % vuid)
            else:
                _logger.debug("wait %s " % name)
                proxyS.querySQL("UPDATE Datasets SET modificationdate=UTC_TIMESTAMP() WHERE vuid='%s'" % vuid)                
        _logger.debug("end %s " % name)
        time.sleep(1)
Exemplo n.º 12
0
				stF,resF = taskBuffer.querySQLS(sqlF,varMap)
				if resF == None or len(resF) == 0:
					_logger.debug("  no files")
				else:
					# get lib.tgz and destDBlock
					for lfn,filetype,destinationDBlock in resF:
						if filetype == 'input' and lfn.endswith('.lib.tgz'):
							useLib = True
							libLFN = lfn
							varMap = {}
							varMap[':lfn'] = lfn
							varMap[':type']  = 'output'
							stL,resL = taskBuffer.querySQLS(sqlL,varMap)
							# not found
							if resL == None or len(resL) == 0:
								_logger.error("  cannot find status of %s" % lfn)
								continue
							# check status
							guid,outFileStatus,pandaIDOutLibTgz,tmpLibDsName = resL[0]
							_logger.debug("  PandaID:%s produces %s:%s GUID=%s status=%s" % (pandaIDOutLibTgz,tmpLibDsName,lfn,guid,outFileStatus))
							libStatus = outFileStatus
							libGUID   = guid
							libDSName = tmpLibDsName
						elif filetype in ['log','output']:
							if destinationDBlock != None and re.search('_sub\d+$',destinationDBlock) != None:
								destReady = True
					break
			_logger.debug("  useLib:%s libStatus:%s libDsName:%s libLFN:%s libGUID:%s destReady:%s" % (useLib,libStatus,libDSName,libLFN,libGUID,destReady))
			if libStatus == 'failed':
				# delete downstream jobs
				_logger.debug("  -> delete downstream jobs")
Exemplo n.º 13
0
                        _logger.debug('skip since alredy there %s' % tmpFile)                        
                        continue
            # copy
            _logger.debug('copy %s' % tmpFile)
            outFile = open(dstFileName,'wb')
            ftp.retrbinary('RETR %s' % tmpFile,outFile.write)
            outFile.close()
            _logger.debug('end %s' % tmpFile)
    # quit        
    output = ftp.quit()
    _logger.debug(output)
    # make list
    listFileName = 'applist'
    listFilePath = '%s/%s' % (destDir,listFileName)
    listFile = open(listFilePath,'w')
    for tmpFile in os.listdir(destDir):
        # skip hidden files
        if tmpFile.startswith('.'):
            continue
        # skip applist
        if tmpFile == listFileName:
            continue
        listFile.write('%s\n' % tmpFile)
    listFile.close()    
except:
    errType,errValue = sys.exc_info()[:2]
    _logger.error("Failed with %s %s" % (errType,errValue))


_logger.debug("===================== end =====================")
Exemplo n.º 14
0
        if re.search('python', line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line)
        startTime = datetime.datetime(
            *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid, startTime))
            _logger.debug(line)
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type, value))

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# table names
jobATableName = "ATLAS_PANDAARCH.jobsArchived"
filesATableName = "ATLAS_PANDAARCH.filesTable_ARCH"
paramATableName = "ATLAS_PANDAARCH.jobParamsTable_ARCH"
metaATableName = "ATLAS_PANDAARCH.metaTable_ARCH"

# time limit
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=3)
Exemplo n.º 15
0
            taskBuffer.insertNetworkMatrixData(data_combined)
            # Do some cleanup of old data
            taskBuffer.deleteOldNetworkData()
            return True
        else:
            return False

if __name__ == "__main__":

    # If no argument, call the basic configurator
    if len(sys.argv) == 1:
        t1 = time.time()
        configurator = Configurator()
        if not configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug("Configurator run took {0}s".format(t2-t1))

    # If --network argument, call the network configurator
    elif len(sys.argv) == 2 and sys.argv[1].lower() == '--network':
        t1 = time.time()
        network_configurator = NetworkConfigurator()
        if not network_configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug(" run took {0}s".format(t2-t1))

    else:
        _logger.error("Configurator being called with wrong arguments. Use either no arguments or --network")

Exemplo n.º 16
0
        if re.search('python', line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line)
        startTime = datetime.datetime(
            *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid, startTime))
            _logger.debug(line)
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type, value))

# time limit
timeLimitU = datetime.datetime.utcnow() - datetime.timedelta(minutes=5)
timeLimitL = datetime.datetime.utcnow() - datetime.timedelta(hours=12)
timeLimitX = datetime.datetime.utcnow() - datetime.timedelta(hours=6)

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)


# thread pool
class ThreadPool:
    def __init__(self):
        self.lock = threading.Lock()
        self.list = []
Exemplo n.º 17
0
            seconds=overallTimeout - 180)
        # get process list
        scriptName = sys.argv[0]
        out = commands.getoutput(
            'env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
        for line in out.split('\n'):
            items = line.split()
            # owned process
            if not items[0] in ['sm', 'atlpan', 'pansrv', 'root'
                                ]:  # ['os.getlogin()']: doesn't work in cron
                continue
            # look for python
            if re.search('python', line) == None:
                continue
            # PID
            pid = items[1]
            # start time
            timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line)
            startTime = datetime.datetime(
                *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6])
            # kill old process
            if startTime < timeLimit:
                _logger.debug("old process : %s %s" % (pid, startTime))
                _logger.debug(line)
                commands.getoutput('kill -9 %s' % pid)
    except:
        errtype, errvalue = sys.exc_info()[:2]
        _logger.error("kill process : %s %s" % (errtype, errvalue))
    # main loop
    main()
Exemplo n.º 18
0
        if re.search('python', line) == None:
            continue
        # PID
        pid = items[1]
        # start time
        timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line)
        startTime = datetime.datetime(
            *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6])
        # kill old process
        if startTime < timeLimit:
            _logger.debug("old process : %s %s" % (pid, startTime))
            _logger.debug(line)
            commands.getoutput('kill -9 %s' % pid)
except:
    type, value, traceBack = sys.exc_info()
    _logger.error("kill process : %s %s" % (type, value))

# instantiate PD2P
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)
siteMapper = SiteMapper.SiteMapper(taskBuffer)


# thread pool
class ThreadPool:
    def __init__(self):
        self.lock = threading.Lock()
        self.list = []

    def add(self, obj):
        self.lock.acquire()
        self.list.append(obj)