Пример #1
0
class PilotStreaming:
    def __init__(self):
        self._logger = PandaLogger().getLogger('PilotStreaming')
        return

    def run(self):
        """
        Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester
        via the command interface
        :return:
        """

        # timing
        time_start = time.time()
        self._logger.debug('Start.')

        # get unified pilot streaming (ups) queues
        ups_queues = taskBuffer.ups_get_queues()
        self._logger.debug('UPS queues: {0}'.format(ups_queues))

        # get worker stats
        worker_stats = taskBuffer.ups_load_worker_stats()

        for ups_queue in ups_queues:
            # get the worker and job stats for the queue
            try:
                tmp_worker_stats = worker_stats[ups_queue]
                self._logger.debug('worker_stats for queue {0}: {1}'.format(ups_queue, tmp_worker_stats))
                # tmp_job_stats = job_stats[ups_queue]
            except KeyError:
                # skip queue if no data available
                self._logger.debug('No worker stats for queue {0}'.format(ups_queue))
                continue

            new_workers_per_harvester = taskBuffer.ups_new_worker_distribution(ups_queue, tmp_worker_stats)
            self._logger.info('queue: {0}, results: {1}'.format(ups_queue, new_workers_per_harvester))

            # variables for the harvester command
            command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue)
            status = 'new'
            ack_requested = False
            lock_interval = None
            com_interval = None

            for harvester_id in new_workers_per_harvester:
                params = new_workers_per_harvester[harvester_id]
                taskBuffer.commandToHarvester(harvester_id, command, ack_requested, status,
                                              lock_interval, com_interval, params)

        # timing
        time_stop = time.time()
        self._logger.debug('Done. Pilot streaming took: {0} s'.format(time_stop - time_start))

        return
Пример #2
0
class PilotStreaming:
    def __init__(self):
        self._logger = PandaLogger().getLogger('PilotStreaming')
        return

    def run(self):
        """
        Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester
        via the command interface
        :return:
        """

        # get unified pilot streaming (ups) queues
        ups_queues = taskBuffer.ups_get_queues()
        self._logger.debug('UPS queues: {0}'.format(ups_queues))

        # get worker stats
        worker_stats = taskBuffer.ups_load_worker_stats()

        # get global share distribution
        # hs_distribution = proxyS.get_hs_distribution()
        # gs_tree = proxyS
        # print(proxyS.tree.pretty_print_hs_distribution(proxyS._DBProxy__hs_distribution

        for ups_queue in ups_queues:
            # get the worker and job stats for the queue
            try:
                tmp_worker_stats = worker_stats[ups_queue]
                self._logger.debug('worker_stats for queue {0}: {1}'.format(ups_queue, tmp_worker_stats))
                # tmp_job_stats = job_stats[ups_queue]
            except KeyError:
                # skip queue if no data available
                self._logger.debug('No worker stats for queue {0}'.format(ups_queue))
                continue

            new_workers_per_harvester = taskBuffer.ups_new_worker_distribution(ups_queue, tmp_worker_stats)
            self._logger.info('queue: {0}, results: {1}'.format(ups_queue, new_workers_per_harvester))

            # variables for the harvester command
            command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue)
            status = 'new'
            ack_requested = False
            lock_interval = None
            com_interval = None

            for harvester_id in new_workers_per_harvester:
                params = new_workers_per_harvester[harvester_id]

                # TODO: figure out if a command lock call is necessary or how that works
                taskBuffer.commandToHarvester(harvester_id, command, ack_requested, status,
                                              lock_interval, com_interval, params)

        return
Пример #3
0
import threading

from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer
initializer.init()

from dataservice.Merger import Merger
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('runMerger')

_logger.debug("================= start ==================")

# overall timeout value
overallTimeout = 60

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
        minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput(
        'env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
    for line in out.split('\n'):
        items = line.split()
Пример #4
0
from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer
initializer.init()

from dataservice.Merger import Merger
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger


# logger
_logger = PandaLogger().getLogger('runMerger')

_logger.debug("================= start ==================")

# overall timeout value
overallTimeout = 60

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
    for line in out.split('\n'):
        items = line.split()
        # owned process
        if not items[0] in ['sm','atlpan','root']: # ['os.getlogin()']: doesn't work in cron
Пример #5
0
    return tasks_to_retag


def retag_tasks(task_id_list):
    """
    change the share for the selected tasks
    """
    destination_gshare = 'Frontier'
    reassign_running = True
    _logger.debug('Reassigning tasks: {0}'.format(task_id_list))
    return_code, return_message = taskBuffer.reassignShare(
        task_id_list, destination_gshare, reassign_running)

    return return_code, return_message


if __name__ == "__main__":

    # 1. get tasks with frontier failures
    failure_count_by_task = get_frontier_failure_count_by_task()

    # 2. filter out tasks by predefined criteria
    tasks_filtered = filter_tasks(failure_count_by_task)

    # 3. retag the tasks
    if tasks_filtered:
        return_code, return_message = retag_tasks(tasks_filtered)
        _logger.debug('tasks {0} reassigned with: {1}; {2}'.format(
            tasks_filtered, return_code, return_message))
Пример #6
0
            taskBuffer.insertNetworkMatrixData(data_combined)
            # Do some cleanup of old data
            taskBuffer.deleteOldNetworkData()
            return True
        else:
            return False

if __name__ == "__main__":

    # If no argument, call the basic configurator
    if len(sys.argv) == 1:
        t1 = time.time()
        configurator = Configurator()
        if not configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug("Configurator run took {0}s".format(t2-t1))

    # If --network argument, call the network configurator
    elif len(sys.argv) == 2 and sys.argv[1].lower() == '--network':
        t1 = time.time()
        network_configurator = NetworkConfigurator()
        if not network_configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug(" run took {0}s".format(t2-t1))

    else:
        _logger.error("Configurator being called with wrong arguments. Use either no arguments or --network")

Пример #7
0
            seconds=overallTimeout - 180)
        # get process list
        scriptName = sys.argv[0]
        out = commands.getoutput(
            'env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
        for line in out.split('\n'):
            items = line.split()
            # owned process
            if not items[0] in ['sm', 'atlpan', 'pansrv', 'root'
                                ]:  # ['os.getlogin()']: doesn't work in cron
                continue
            # look for python
            if re.search('python', line) == None:
                continue
            # PID
            pid = items[1]
            # start time
            timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line)
            startTime = datetime.datetime(
                *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6])
            # kill old process
            if startTime < timeLimit:
                _logger.debug("old process : %s %s" % (pid, startTime))
                _logger.debug(line)
                commands.getoutput('kill -9 %s' % pid)
    except:
        errtype, errvalue = sys.exc_info()[:2]
        _logger.error("kill process : %s %s" % (errtype, errvalue))
    # main loop
    main()
Пример #8
0
    options,args = optP.parse_args()
    try:
        # time limit
        timeLimit = datetime.datetime.utcnow() - datetime.timedelta(seconds=overallTimeout-180)
        # get process list
        scriptName = sys.argv[0]
        out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
        for line in out.split('\n'):
            items = line.split()
            # owned process
            if not items[0] in ['sm','atlpan','root']: # ['os.getlogin()']: doesn't work in cron
                continue
            # look for python
            if re.search('python',line) == None:
                continue
            # PID
            pid = items[1]
            # start time
            timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)',line)
            startTime = datetime.datetime(*time.strptime(timeM.group(1),'%b %d %H:%M:%S %Y')[:6])
            # kill old process
            if startTime < timeLimit:
                _logger.debug("old process : %s %s" % (pid,startTime))
                _logger.debug(line)            
                commands.getoutput('kill -9 %s' % pid)
    except:
        errtype,errvalue = sys.exc_info()[:2]
        _logger.error("kill process : %s %s" % (errtype,errvalue))
    # main loop    
    main()
Пример #9
0
from jobdispatcher.Watcher import Watcher
from brokerage.SiteMapper import SiteMapper
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('runRebro')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
                continue
            if line.startswith("VmSize:"):
Пример #10
0
            taskBuffer.insertNetworkMatrixData(data_combined)
            # Do some cleanup of old data
            taskBuffer.deleteOldNetworkData()
            return True
        else:
            return False

if __name__ == "__main__":

    # If no argument, call the basic configurator
    if len(sys.argv)==1:
        t1 = time.time()
        configurator = Configurator()
        if not configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug("Configurator run took {0}s".format(t2-t1))

    # If --network argument, call the network configurator
    elif len(sys.argv) == 2 and sys.argv[1].lower() == '--network':
        t1 = time.time()
        network_configurator = NetworkConfigurator()
        if not network_configurator.run():
            _logger.critical("Configurator loop FAILED")
        t2 = time.time()
        _logger.debug(" run took {0}s".format(t2-t1))

    else:
        _logger.error("Configurator being called with wrong arguments. Use either no arguments or --network")

Пример #11
0
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('runRebro')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
                continue
            if line.startswith("VmSize:"):
Пример #12
0
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('backupJobArch')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
                continue
            if line.startswith("VmSize:"):
Пример #13
0
import os
import re
import sys
import datetime
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from brokerage.SiteMapper import SiteMapper

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('prioryMassage')

_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# get usage breakdown
usageBreakDownPerUser = {}
usageBreakDownPerSite = {}
workingGroupList = []
for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsArchived4']:
	varMap = {}
	varMap[':prodSourceLabel'] = 'user'
	if table == 'ATLAS_PANDA.jobsActive4':
Пример #14
0
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('backupJobArch')

_logger.debug("===================== start =====================")


# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name = ""
        vmSize = ""
        vmRSS = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
                continue
Пример #15
0
import os
import re
import sys
from ftplib import FTP
from pandalogger.PandaLogger import PandaLogger

# supported architectures
targetArchs = ['Linux-slc5-gcc4.3.tar.gz','Linux-slc5_amd64-gcc4.3.tar.gz']

# destination dir
destDir = '/data/atlpan/srv/var/appdir'

# logger
_logger = PandaLogger().getLogger('copyROOT')

_logger.debug("===================== start =====================")

try:
    # login to root repository
    ftp = FTP('root.cern.ch')
    output = ftp.login()
    _logger.debug(output)
    output = ftp.cwd('root')
    _logger.debug(output)    
    # get list
    flist = ftp.nlst()
    # loop over all files
    for tmpFile in flist:
        # skip RC
        if re.search('-rc\d\.',tmpFile) != None:
            continue
Пример #16
0
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('deleteJobs')

_logger.debug("===================== start =====================")


# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name = ""
        vmSize = ""
        vmRSS = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
                continue
Пример #17
0
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('deleteJobs')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
                continue
            if line.startswith("VmSize:"):
Пример #18
0
import glob
import time
import os.path
import commands
import datetime
import threading
from config import panda_config
from taskbuffer.TaskBuffer import taskBuffer
from brokerage import SiteMapper
from dataservice.EventPicker import EventPicker
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('evpPD2P')

_logger.debug("===================== start =====================")

# overall timeout value
overallTimeout = 300
# prefix of evp files
prefixEVP = 'evp.'
# file pattern of evp files
evpFilePatt = panda_config.cache_dir + '/' + prefixEVP + '*'

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
        minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
Пример #19
0
import sys
from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer
initializer.init()

from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger


# logger
_logger = PandaLogger().getLogger('boostUser')
_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

user = sys.stdin.read()
user = user[:-1]

sql = "UPDATE atlas_panda.%s set currentPriority=:prio where prodUserName=:uname and prodSourceLabel IN (:label1,:label2) and currentPriority<:prio"
varMap = {}
varMap[':prio'] = 4000
varMap[':uname'] = user
varMap[':label1'] = 'user'
varMap[':label2'] = 'panda'
for table in ('jobsactive4','jobsdefined4'):
	_logger.debug((sql % table) + str(varMap))
	ret = taskBuffer.querySQLS(sql % table,varMap)
	_logger.debug('ret -> %s' % str(ret))
Пример #20
0
from dataservice import DataServiceUtils
from dataservice.Closer import Closer
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('datasetManager')

_logger.debug("===================== start =====================")

# use native DQ2
ddm.useDirectDQ2()

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
Пример #21
0
# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost, panda_config.dbpasswd, panda_config.dbuser,
               panda_config.dbname)

# time limit for dataset closing
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7)

# close datasets
while True:
    sql = "SELECT vuid,name,modificationdate FROM Datasets " + \
          "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \
          "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'"
    ret, res = proxyS.querySQLS(sql % timeLimit.strftime('%Y-%m-%d %H:%M:%S'))
    _logger.debug("# of dataset : %s" % len(res))
    if len(res) == 0:
        break
    for (vuid, name, modDate) in res:
        _logger.debug("start %s %s" % (modDate, name))
        retF, resF = proxyS.querySQLS(
            "SELECT lfn FROM filesTable4 WHERE destinationDBlock='%s'" % name)
        if retF < 0 or retF == None or retF != len(resF):
            _logger.error("SQL error")
        else:
            # no files in filesTable
            if len(resF) == 0:
                _logger.debug("freeze %s " % name)
                status, out = ddm.dq2.main(['freezeDataset', name])
                if status != 0 or (out.find('Error') != -1 and out.find('DQ2 unknown dataset exception') == -1 \
                                   and out.find('DQ2 security exception') == -1):
Пример #22
0
from dataservice import DataServiceUtils
from dataservice.Closer import Closer
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('datasetManager')

_logger.debug("===================== start =====================")

# use native DQ2
ddm.useDirectDQ2()

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
Пример #23
0
import sys
from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer

initializer.init()

from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('boostUser')
_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

user = sys.stdin.read()
user = user[:-1]

sql = "UPDATE atlas_panda.%s set currentPriority=:prio where prodUserName=:uname and prodSourceLabel IN (:label1,:label2) and currentPriority<:prio"
varMap = {}
varMap[':prio'] = 4000
varMap[':uname'] = user
varMap[':label1'] = 'user'
varMap[':label2'] = 'panda'
for table in ('jobsactive4', 'jobsdefined4'):
    _logger.debug((sql % table) + str(varMap))
    ret = taskBuffer.querySQLS(sql % table, varMap)
    _logger.debug('ret -> %s' % str(ret))
Пример #24
0
passwd = panda_config.dbpasswd

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname)

# time limit for dataset closing
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7)

# close datasets
while True:
    sql = "SELECT vuid,name,modificationdate FROM Datasets " + \
          "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \
          "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'"
    ret,res = proxyS.querySQLS(sql % timeLimit.strftime('%Y-%m-%d %H:%M:%S'))
    _logger.debug("# of dataset : %s" % len(res))
    if len(res) == 0:
        break
    for (vuid,name,modDate) in res:
        _logger.debug("start %s %s" % (modDate,name))
        retF,resF = proxyS.querySQLS("SELECT lfn FROM filesTable4 WHERE destinationDBlock='%s'" % name)
        if retF<0 or retF == None or retF!=len(resF):
            _logger.error("SQL error")
        else:
            # no files in filesTable
            if len(resF) == 0:
                _logger.debug("freeze %s " % name)
                status,out = ddm.dq2.main(['freezeDataset',name])
                if status != 0 or (out.find('Error') != -1 and out.find('DQ2 unknown dataset exception') == -1 \
                                   and out.find('DQ2 security exception') == -1):
                    _logger.error(out)
Пример #25
0
import glob
import time
import os.path
import commands
import datetime
import threading
from config import panda_config
from taskbuffer.TaskBuffer import taskBuffer
from brokerage import SiteMapper
from dataservice.EventPicker import EventPicker
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('evpPD2P')

_logger.debug("===================== start =====================")

# overall timeout value
overallTimeout = 300
# prefix of evp files
prefixEVP = 'evp.'
# file pattern of evp files
evpFilePatt = panda_config.cache_dir + '/' + prefixEVP + '*'

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
Пример #26
0
import commands
import threading
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from dataservice.AdderGen import AdderGen
from brokerage.SiteMapper import SiteMapper
from pandautils import PandaUtils

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('add')

_logger.debug("===================== start =====================")

# overall timeout value
overallTimeout = 20

# current minute
currentMinute = datetime.datetime.utcnow().minute

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
    for line in out.split('\n'):
Пример #27
0
class PilotStreaming:
    def __init__(self):
        self._logger = PandaLogger().getLogger('PilotStreaming')
        return

    def run(self):
        """
        Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester
        via the command interface
        :return:
        """

        # timing
        time_start = time.time()
        self._logger.debug('Start.')

        # get unified pilot streaming (ups) queues
        ups_queues = taskBuffer.ups_get_queues()
        self._logger.debug('UPS queues: {0}'.format(ups_queues))

        # get worker stats
        worker_stats = taskBuffer.ups_load_worker_stats()

        for ups_queue in ups_queues:
            # get the worker and job stats for the queue
            try:
                tmp_worker_stats = worker_stats[ups_queue]
                self._logger.debug('worker_stats for queue {0}: {1}'.format(
                    ups_queue, tmp_worker_stats))
                # tmp_job_stats = job_stats[ups_queue]
            except KeyError:
                # skip queue if no data available
                self._logger.debug(
                    'No worker stats for queue {0}'.format(ups_queue))
                continue

            new_workers_per_harvester = taskBuffer.ups_new_worker_distribution(
                ups_queue, tmp_worker_stats)
            self._logger.info('queue: {0}, results: {1}'.format(
                ups_queue, new_workers_per_harvester))

            # variables for the harvester command
            command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue)
            status = 'new'
            ack_requested = False
            lock_interval = None
            com_interval = None

            for harvester_id in new_workers_per_harvester:
                params = new_workers_per_harvester[harvester_id]
                taskBuffer.commandToHarvester(harvester_id, command,
                                              ack_requested, status,
                                              lock_interval, com_interval,
                                              params)

        # timing
        time_stop = time.time()
        self._logger.debug(
            'Done. Pilot streaming took: {0} s'.format(time_stop - time_start))

        return
Пример #28
0
import os
import re
import sys
from ftplib import FTP
from pandalogger.PandaLogger import PandaLogger

# supported architectures
targetArchs = ['Linux-slc5-gcc4.3.tar.gz', 'Linux-slc5_amd64-gcc4.3.tar.gz']

# destination dir
destDir = '/var/appdir'

# logger
_logger = PandaLogger().getLogger('copyROOT')

_logger.debug("===================== start =====================")

try:
    # login to root repository
    ftp = FTP('root.cern.ch')
    output = ftp.login()
    _logger.debug(output)
    output = ftp.cwd('root')
    _logger.debug(output)
    # get list
    flist = ftp.nlst()
    # loop over all files
    for tmpFile in flist:
        # skip RC
        if re.search('-rc\d\.', tmpFile) != None:
            continue
Пример #29
0
    tasks_to_retag = tasks_filtered_pslabel

    return tasks_to_retag


def retag_tasks(task_id_list):
    """
    change the share for the selected tasks
    """
    destination_gshare = 'Frontier'
    reassign_running = True
    _logger.debug('Reassigning tasks: {0}'.format(task_id_list))
    return_code, return_message = taskBuffer.reassignShare(task_id_list, destination_gshare, reassign_running)

    return return_code, return_message


if __name__ == "__main__":

    # 1. get tasks with frontier failures
    failure_count_by_task = get_frontier_failure_count_by_task()

    # 2. filter out tasks by predefined criteria
    tasks_filtered = filter_tasks(failure_count_by_task)

    # 3. retag the tasks
    if tasks_filtered:
        return_code, return_message = retag_tasks(tasks_filtered)
        _logger.debug('tasks {0} reassigned with: {1}; {2}'.format(tasks_filtered, return_code, return_message))
Пример #30
0
import os
import re
import sys
import datetime
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from brokerage.SiteMapper import SiteMapper

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('shareMgr')

_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# number of jobs to be activated per queue
nJobsPerQueue = 50

# priority threshold
prioCutoff = 950

# get high prio jobs without throttling
sql  = "SELECT distinct computingSite FROM ATLAS_PANDA.jobsActive4 "