Exemplos de FileSpec em Python, exemplos de pandaharvester.harvestercore.file_spec.FileSpec em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: act_stager.py Projeto: HSF/harvester

    def trigger_stage_out(self, jobspec):
        """Trigger the stage-out procedure for the job.
        Create a dummy output file to force harvester to wait until aCT
        job is done

        :param jobspec: job specifications
        :type jobspec: JobSpec
        :return: A tuple of return code (True: success, False: fatal failure, None: temporary failure)
                 and error dialog
        :rtype: (bool, string)
        """
        fileSpec = FileSpec()
        fileSpec.PandaID = jobspec.PandaID
        fileSpec.taskID = jobspec.taskID
        fileSpec.lfn = 'dummy.{0}'.format(jobspec.PandaID)
        fileSpec.scope = 'dummy'
        fileSpec.fileType = 'output'
        jobspec.add_in_file(fileSpec)

        return True, ''

Exemplo n.º 2

0

Exibir arquivo

Arquivo: job_fetcher.py Projeto: PanDAWMS/panda-harvester

 def run(self):
     while True:
         mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(harvester_config.jobfetcher.nQueues,
                                                            harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(siteName, self.nodeName,
                                                       queueConfig.get_source_label(),
                                                       self.nodeName, nJobs,
                                                       queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute('schedulerID',
                                               'harvester-{0}'.format(harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[tmpLFN] = self.dbProxy.get_file_status(tmpLFN, 'input',
                                                                                queueConfig.ddmEndpointIn,
                                                                                'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(len(jobs),sw_startconvert.get_elapsed_time()))
                 sw_insertdb =core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Exemplo n.º 3

0

Exibir arquivo

queueConfig = queueConfigMapper.get_queue(queueName)
print queueConfig.stager
print queueConfig.stager['Globus_srcPath']
print queueConfig.stager['srcEndpoint']
print queueConfig.stager['Globus_dstPath']
print queueConfig.stager['dstEndpoint']
print queueConfig.stager['zipDir']

print "Initial queueConfig.stager = ", queueConfig.stager
queueConfig.stager['module'] = 'pandaharvester.harvesterstager.go_stager'
queueConfig.stager['name'] = 'GlobusStager'
print "Modified queueConfig.stager = ", queueConfig.stager

scope = 'panda'

fileSpec = FileSpec()
fileSpec.fileType = 'es_output'
fileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex + '.gz'
fileSpec.fileAttributes = {}
assFileSpec = FileSpec()
assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex
assFileSpec.fileType = 'es_output'
assFileSpec.fsize = random.randint(10, 100)
# create source file
hash = hashlib.md5()
hash.update('%s:%s' % (scope, fileSpec.lfn))
hash_hex = hash.hexdigest()
correctedscope = "/".join(scope.split('.'))
assFileSpec.path = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(
    endPoint=queueConfig.stager['Globus_srcPath'],
    scope=correctedscope,

Exemplo n.º 4

0

Exibir arquivo

Arquivo: stageOutTest.py Projeto: PanDAWMS/panda-harvester

file_prefix = 'panda.sgotest.'


def exit_func():
    for f in os.listdir('.'):
        if f.startswith(file_prefix):
            os.remove(f)


atexit.register(exit_func)

queueName = sys.argv[1]
queueConfigMapper = QueueConfigMapper()
queueConfig = queueConfigMapper.get_queue(queueName)

fileSpec = FileSpec()
fileSpec.fileType = 'output'
fileSpec.lfn = file_prefix + uuid.uuid4().hex + '.gz'
fileSpec.fileAttributes = {'guid': str(uuid.uuid4())}
fileSpec.checksum = '0d439274'
assFileSpec = FileSpec()
assFileSpec.lfn = file_prefix + uuid.uuid4().hex
assFileSpec.fileType = 'es_output'
assFileSpec.fsize = random.randint(10, 100)
assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn
oFile = open(assFileSpec.lfn, 'w')
oFile.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)
jobSpec = JobSpec()
jobSpec.jobParams = {'outFiles': fileSpec.lfn + ',log',

Exemplo n.º 5

0

Exibir arquivo

Arquivo: stageOutTest_go_bulk_stager.py Projeto: PanDAWMS/panda-harvester

 jobSpec.jobParams = {
                      'scopeLog': 'panda',
                      'logFile': 'log',
                      }
 jobSpec.computingSite = queueName
 jobSpec.PandaID = job_id
 jobSpec.modificationTime = datetime.datetime.now()
 realDataset = 'panda.sgotest.' + uuid.uuid4().hex
 ddmEndPointOut = 'BNL-OSG2_DATADISK'
 outFiles_scope_str = ''
 outFiles_str = ''
 realDatasets_str = ''
 ddmEndPointOut_str = ''
 # create up 5 files for output
 for index in range(random.randint(1, 5)):
    fileSpec = FileSpec()
    assFileSpec = FileSpec()
    fileSpec.fileType = 'es_output'
    assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex
    fileSpec.lfn = assFileSpec.lfn + '.gz'
    fileSpec.scope = 'panda'
    outFiles_scope_str += 'panda,' 
    outFiles_str += fileSpec.lfn + ','
    realDatasets_str += realDataset + ","
    ddmEndPointOut_str += ddmEndPointOut + ","
    assFileSpec.fileType = 'es_output'
    assFileSpec.fsize = random.randint(10, 100)
    # create source file
    hash = hashlib.md5()
    hash.update('%s:%s' % (scope, fileSpec.lfn))
    hash_hex = hash.hexdigest()

Exemplo n.º 6

0

Exibir arquivo

 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.get_pid()),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             default_prodSourceLabel = queueConfig.get_source_label()
             pdpm = getattr(queueConfig,
                            'prodSourceLabelRandomWeightsPermille', {})
             choice_list = core_utils.make_choice_list(
                 pdpm=pdpm, default=default_prodSourceLabel)
             prodSourceLabel = random.choice(choice_list)
             tmpLog.debug('getting {0} jobs for prodSourceLabel {1}'.format(
                 nJobs, prodSourceLabel))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, prodSourceLabel, self.nodeName,
                 nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(
                 len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 # get extractor plugin
                 if hasattr(queueConfig, 'extractor'):
                     extractorCore = self.pluginFactory.get_plugin(
                         queueConfig.extractor)
                 else:
                     extractorCore = None
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     fileGroupDictList = [
                         jobSpec.get_input_file_attributes()
                     ]
                     if extractorCore is not None:
                         fileGroupDictList.append(
                             extractorCore.get_aux_inputs(jobSpec))
                     for fileGroupDict in fileGroupDictList:
                         for tmpLFN, fileAttrs in iteritems(fileGroupDict):
                             # check file status
                             if tmpLFN not in fileStatMap:
                                 fileStatMap[
                                     tmpLFN] = self.dbProxy.get_file_status(
                                         tmpLFN, 'input',
                                         queueConfig.ddmEndpointIn,
                                         'starting')
                             # make file spec
                             fileSpec = FileSpec()
                             fileSpec.PandaID = jobSpec.PandaID
                             fileSpec.taskID = jobSpec.taskID
                             fileSpec.lfn = tmpLFN
                             fileSpec.endpoint = queueConfig.ddmEndpointIn
                             fileSpec.scope = fileAttrs['scope']
                             # set preparing to skip stage-in if the file is (being) taken care of by another job
                             if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                     or 'to_prepare' in fileStatMap[tmpLFN]:
                                 fileSpec.status = 'preparing'
                             else:
                                 fileSpec.status = 'to_prepare'
                             if fileSpec.status not in fileStatMap[tmpLFN]:
                                 fileStatMap[tmpLFN][fileSpec.status] = 0
                             fileStatMap[tmpLFN][fileSpec.status] += 1
                             if 'INTERNAL_FileType' in fileAttrs:
                                 fileSpec.fileType = fileAttrs[
                                     'INTERNAL_FileType']
                                 jobSpec.auxInput = JobSpec.AUX_hasAuxInput
                             else:
                                 fileSpec.fileType = 'input'
                             if 'INTERNAL_URL' in fileAttrs:
                                 fileSpec.url = fileAttrs['INTERNAL_URL']
                             jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(
                     len(jobs), sw_startconvert.get_elapsed_time()))
                 sw_insertdb = core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(
                     len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Exemplo n.º 7

0

Exibir arquivo

Arquivo: stageInTest_globus.py Projeto: pavlo-svirin/harvester

jobSpec.modificationTime = datetime.datetime.now()
realDataset = 'panda.sgotest.' + uuid.uuid4().hex
ddmEndPointIn = 'BNL-OSG2_DATADISK'
inFiles_scope_str = ''
inFiles_str = ''
realDatasets_str = ''
realDatasetsIn_str = ''
ddmEndPointIn_str = ''
GUID_str = ''
fsize_str = ''
checksum_str = ''
scope_in_str = ''

# create up 5 files for input
for index in range(random.randint(1, 5)):
    fileSpec = FileSpec()
    assFileSpec = FileSpec()
    fileSpec.fileType = 'input'
    assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex
    fileSpec.lfn = assFileSpec.lfn
    fileSpec.scope = 'panda'
    inFiles_scope_str += 'panda,'
    inFiles_str += fileSpec.lfn + ','
    realDatasets_str += realDataset + ","
    realDatasetsIn_str += realDataset + ","
    ddmEndPointIn_str += ddmEndPointIn + ","
    # some dummy inputs
    GUID_str += 'd82e8e5e301b77489fd4da04bcdd6565,'
    fsize_str += '3084569129,'
    checksum_str += 'ad:9f60d29f,'
    scope_in_str += 'panda,'

Exemplo n.º 8

0

Exibir arquivo

file_prefix = 'panda.sgotest.'


def exit_func():
    for f in os.listdir('.'):
        if f.startswith(file_prefix):
            os.remove(f)


atexit.register(exit_func)

queueName = sys.argv[1]
queueConfigMapper = QueueConfigMapper()
queueConfig = queueConfigMapper.get_queue(queueName)

fileSpec = FileSpec()
fileSpec.fileType = 'output'
fileSpec.lfn = file_prefix + uuid.uuid4().hex + '.gz'
fileSpec.fileAttributes = {'guid': str(uuid.uuid4())}
fileSpec.checksum = '0d439274'
assFileSpec = FileSpec()
assFileSpec.lfn = file_prefix + uuid.uuid4().hex
assFileSpec.fileType = 'es_output'
assFileSpec.fsize = random.randint(10, 100)
assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn
oFile = open(assFileSpec.lfn, 'w')
oFile.write(''.join(
    random.choice(string.ascii_uppercase + string.digits)
    for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: job_fetcher.py Projeto: jtchilders/panda-harvester

 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.ident),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, queueConfig.get_source_label(),
                 self.nodeName, nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1}'.format(len(jobs), errStr))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(
                             jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[
                                 tmpLFN] = self.dbProxy.get_file_status(
                                     tmpLFN, 'input',
                                     queueConfig.ddmEndpointIn, 'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 self.dbProxy.insert_jobs(jobSpecs)
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Exemplo n.º 10

0

Exibir arquivo

Arquivo: updateEventRangesTest.py Projeto: rukmarr/panda-harvester

import sys
import uuid
from pandaharvester.harvestercore.job_spec import JobSpec
from pandaharvester.harvestercore.file_spec import FileSpec
from pandaharvester.harvestercore.event_spec import EventSpec

from pandaharvester.harvestercore.communicator_pool import CommunicatorPool

rID = sys.argv[1]
taskid = rID.split('-')[0]
pandaid = long(rID.split('-')[1])

job = JobSpec()
job.PandaID = pandaid
event = EventSpec()
file = FileSpec()
file.status = 'finished'
file.objstoreID = 9575
file.pathConvention = 1000
file.lfn = str(uuid.uuid4().hex) + '.zip'
file.fsize = 555
file.chksum = '0d2a9dc9'
event.eventRangeID = rID
event.eventStatus = 'finished'
job.zipEventMap = {1: {'events':[event],
                       'zip':file}}


a = CommunicatorPool()
a.update_jobs([job])