Пример #1
0
    def events_to_update(self, workspec):
        '''Report events processed for harvester to update'''

        # get logger
        arclog = arc_utils.ARCLogger(baselogger, workspec.workerID)
        tmpLog = arclog.log

        job = workspec.workAttributes['arcjob']
        arcid = job['JobID']
        # Set certificate to use for interacting with ARC CE
        usercfg = arc.UserConfig(self.cred_type)
        if not self._setup_proxy(usercfg, workspec, arcid, tmpLog):
            return False

        # Check for jobid/jsonEventsUpdateFileName on CE, rename to .read
        retDict = dict()
        for pandaID in workspec.pandaid_list:

            # first look for json.read which is not yet acknowledged
            accessPoint = self.get_access_point(workspec, pandaID)
            localJsonFilePath = os.path.join(accessPoint,
                                             jsonEventsUpdateFileName)
            remoteJsonFilePathRead = '%s/%s%s' % (
                arcid, jsonEventsUpdateFileName, suffixReadJson)
            tmpLog.debug('looking for event update file {0}'.format(
                remoteJsonFilePathRead))

            status = self._copy_file(remoteJsonFilePathRead, localJsonFilePath,
                                     usercfg, tmpLog)
            if not status:
                if status.GetErrno() != errno.ENOENT:
                    tmpLog.warning('Failed checking {0}: {1}'.format(
                        remoteJsonFilePathRead, str(status)))
                    continue

                # Look for new json
                remoteJsonFilePath = '%s/%s' % (arcid,
                                                jsonEventsUpdateFileName)
                status = self._copy_file(remoteJsonFilePath, localJsonFilePath,
                                         usercfg, tmpLog)
                if not status:
                    if status.GetErrno() != errno.ENOENT:
                        tmpLog.warning('Failed checking {0}: {1}'.format(
                            remoteJsonFilePath, str(status)))
                    else:
                        # not found
                        tmpLog.debug('not found')
                    continue

                # Rename to prevent from being overwritten
                # Gridftp does not support renaming so upload .read file and delete old one
                status = self._copy_file(localJsonFilePath,
                                         remoteJsonFilePathRead, usercfg,
                                         tmpLog)
                if not status:
                    tmpLog.warning('Failed copying {0} to {1}: {2}'.format(
                        localJsonFilePath, remoteJsonFilePathRead,
                        str(status)))
                # If rename fails, delete old file anyway
                status = self._delete_file(remoteJsonFilePath, usercfg, tmpLog)
                if not status:
                    tmpLog.warning('Failed deleting {0}: {1}'.format(
                        remoteJsonFilePath, str(status)))

            # load json
            nData = 0
            try:
                with open(localJsonFilePath) as jsonFile:
                    tmpOrigDict = json.load(jsonFile)
                    newDict = dict()
                    # change the key from str to int
                    for tmpPandaID, tmpDict in tmpOrigDict.iteritems():
                        tmpPandaID = long(tmpPandaID)
                        retDict[tmpPandaID] = tmpDict
                        nData += 1
            except Exception:
                raise
                tmpLog.error('failed to load json')
            # delete local file
            try:
                os.remove(localJsonFilePath)
            except Exception:
                pass
            tmpLog.debug('got {0} events for PandaID={1}'.format(
                nData, pandaID))
        return retDict
Пример #2
0
import arc
import sys

# Set up logging to stderr with level VERBOSE (a lot of output will be shown)
logstdout = arc.LogStream(sys.stdout)
logstdout.setFormat(arc.ShortFormat)
arc.Logger_getRootLogger().addDestination(logstdout)
arc.Logger_getRootLogger().setThreshold(arc.VERBOSE)
logger = arc.Logger(arc.Logger_getRootLogger(), "jobsubmit")

# UserConfig contains information on credentials and default services to use.
# This form of the constructor is necessary to initialise the local job list.
usercfg = arc.UserConfig("", "")

# Simple job description which outputs hostname to stdout
jobdescstring = "&(executable=/bin/hostname)(stdout=stdout)"

# Parse job description
jobdescs = arc.JobDescriptionList()
if not arc.JobDescription_Parse(jobdescstring, jobdescs):
    logger.msg(arc.ERROR, "Invalid job description")
    sys.exit(1)

# Use 'arc.JobDescription_ParseFromFile("helloworld.xrsl", jobdescs)'
# to parse job description from file.

# Use top-level NorduGrid information index to find resources
index = arc.Endpoint(
    "ldap://index1.nordugrid.org:2135/Mds-Vo-name=NorduGrid,o=grid",
    arc.Endpoint.REGISTRY, "org.nordugrid.ldapegiis")
services = arc.EndpointList(1, index)
Пример #3
0
    def post_processing(self, workspec, jobspec_list, map_type):
        '''
        Fetch job output and process pilot info for sending in final heartbeat.
        The pilot pickle is loaded and some attributes corrected (schedulerid,
        pilotlog etc), then converted to dictionary and stored in
        workspec.workAttributes[pandaid]. If pilot pickle cannot be used,
        report ARC error in pilotErrorDiag and fill all possible attributes
        using ARC information.
        '''

        arclog = arc_utils.ARCLogger(baselogger, workspec.workerID)
        tmplog = arclog.log
        tmplog.info('Post processing ARC job {0}'.format(workspec.batchID))
        job = workspec.workAttributes['arcjob']
        arcid = job['JobID']
        tmplog.info('Job id {0}'.format(arcid))

        if 'arcdownloadfiles' not in workspec.workAttributes:
            tmplog.error('No files to download')
            return

        # Assume one-to-one mapping of workers to jobs. If jobspec_list is empty
        # it means the job was cancelled by panda or otherwise forgotten
        if not jobspec_list:
            return

        # Set certificate to use for interacting with ARC CE
        userconfig = arc.UserConfig(self.cred_type)
        if not self._setup_proxy(usercfg, workspec, arcid, tmplog):
            return

        queueconfigmapper = QueueConfigMapper()
        queueconfig = queueconfigmapper.get_queue(
            jobspec_list[0].computingSite)
        logbaseurl = queueconfig.submitter.get('logBaseURL')
        logbasedir = queueconfig.submitter.get('logDir', self.tmpdir)
        logsubdir = workspec.workAttributes['logsubdir']
        pandaid = str(jobspec_list[0].PandaID)

        # Construct log path and url
        logurl = '/'.join([logbaseurl, logsubdir,
                           str(pandaid)]) if logbaseurl else None
        logdir = os.path.join(logbasedir, logsubdir)

        # post_processing is only called once, so no retries are done. But keep
        # the possibility here in case it changes
        (fetched, notfetched, notfetchedretry) = self._download_outputs(
            workspec.workAttributes['arcdownloadfiles'], logdir, arcid,
            pandaid, userconfig, tmplog)
        if arcid not in fetched:
            tmplog.warning("Could not get outputs of {0}".format(arcid))

        workspec.workAttributes[long(pandaid)] = {}

        workspec.workAttributes[long(
            pandaid)] = self._extractAndFixPilotPickle(job, pandaid,
                                                       (arcid in fetched),
                                                       logurl, tmplog)

        tmplog.debug("pilot info for {0}: {1}".format(
            pandaid, workspec.workAttributes[long(pandaid)]))
Пример #4
0
#! /usr/bin/env python

from __future__ import print_function

import arc
import sys
root_logger = arc.Logger_getRootLogger()
root_logger.addDestination(arc.LogStream(sys.stdout))
root_logger.setThreshold(arc.ERROR)
if len(sys.argv) < 2:
    print("Usage: echo_client.py URL [message]")
    print(
        "  echo_client gets the credentials from the default user config file")
    sys.exit(-1)
url = arc.URL(sys.argv[1])
try:
    message = sys.argv[2]
except:
    message = 'hi!'
cfg = arc.MCCConfig()
uc = arc.UserConfig('')
uc.ApplyToConfig(cfg)
s = arc.ClientSOAP(cfg, url)
outpayload = arc.PayloadSOAP(
    arc.NS('echo', 'http://www.nordugrid.org/schemas/echo'))
outpayload.NewChild('echo:echo').NewChild('echo:say').Set(message)
resp, status = s.process(outpayload)
print(resp.GetXML(True))
Пример #5
0
    def check_workers(self, workspec_list):
        retList = []
        for workspec in workspec_list:

            # make logger
            arclog = arc_utils.ARCLogger(baselogger, workspec.workerID)
            tmplog = arclog.log
            tmplog.info("checking worker id {0}".format(workspec.workerID))
            (job, modtime, proxyrole) = arc_utils.workspec2arcjob(workspec)

            # Set certificate
            userconfig = arc.UserConfig(self.cred_type)
            try:
                userconfig.ProxyPath(str(self.certs[proxyrole]))
            except:
                tmplog.error("Job {0}: no proxy found with role {1}".format(
                    job.JobID, proxyrole))
                retList.append((workspec.status, ''))
                continue

            job_supervisor = arc.JobSupervisor(userconfig, [job])
            job_supervisor.Update()

            jobsupdated = job_supervisor.GetAllJobs()
            jobsnotupdated = job_supervisor.GetIDsNotProcessed()

            for updatedjob in jobsupdated:
                if updatedjob.JobID in jobsnotupdated:
                    tmplog.error("Failed to find information on {0}".format(
                        updatedjob.JobID))
                    # If missing for too long (2 days), mark as lost
                    if arc.Time() - modtime > arc.Period(172800):
                        tmplog.error(
                            "Job {0} missing for more than 2 days, marking as lost"
                            .format(updatedjob.JobID))
                        retList.append((workspec.ST_failed, ''))
                    else:
                        retList.append((workspec.status, ''))
                    continue

                # Convert arc state to WorkSpec state
                arcstatus = updatedjob.State
                newstatus = WorkSpec.ST_submitted
                if arcstatus == arc.JobState.RUNNING or \
                   arcstatus == arc.JobState.FINISHING:
                    newstatus = WorkSpec.ST_running
                elif arcstatus == arc.JobState.FINISHED:
                    if updatedjob.ExitCode == -1:
                        # Missing exit code, but assume success
                        tmplog.warning(
                            "Job {0} FINISHED but has missing exit code, setting to zero"
                            .format(updatedjob.JobID))
                        updatedjob.ExitCode = 0
                    newstatus = WorkSpec.ST_finished
                elif arcstatus == arc.JobState.FAILED:
                    newstatus = WorkSpec.ST_failed
                    tmplog.info("Job {0} failed: {1}".format(
                        updatedjob.JobID,
                        ";".join([joberr for joberr in updatedjob.Error])))
                elif arcstatus == arc.JobState.KILLED:
                    newstatus = WorkSpec.ST_cancelled
                elif arcstatus == arc.JobState.DELETED or \
                     arcstatus == arc.JobState.OTHER:
                    # unexpected
                    newstatus = WorkSpec.ST_failed
                # Not covered: arc.JobState.HOLD. Maybe need a post-run state in
                # harvester, also to cover FINISHING

                # compare strings here to get around limitations of JobState API
                if job.State.GetGeneralState(
                ) == updatedjob.State.GetGeneralState():
                    tmplog.debug("Job {0} still in state {1}".format(
                        job.JobID, job.State.GetGeneralState()))
                    retList.append((newstatus, ''))
                    continue

                tmplog.info("Job {0}: {1} -> {2} ({3})".format(
                    job.JobID, job.State.GetGeneralState(),
                    updatedjob.State.GetGeneralState(),
                    updatedjob.State.GetSpecificState()))

                arc_utils.arcjob2workspec(updatedjob, workspec)
                # Have to force update to change info in DB
                workspec.force_update('workAttributes')
                tmplog.debug("batchStatus {0} -> workerStatus {1}".format(
                    arcstatus.GetGeneralState(), newstatus))
                retList.append((newstatus, ''))

        return True, retList
Пример #6
0
import arc
import sys

if len(sys.argv) != 2:
    sys.stdout.write("Usage: python partial_copy.py filename\n")
    sys.exit(1)

desired_size = 512
usercfg = arc.UserConfig()
url = arc.URL(sys.argv[1])
handle = arc.DataHandle(url, usercfg)
point = handle.__ref__()
point.SetSecure(
    False)  # GridFTP servers generally do not have encrypted data channel
info = arc.FileInfo("")
point.Stat(info)
sys.stdout.write("Name: %s\n" % str(info.GetName()))
fsize = info.GetSize()
if fsize > desired_size:
    point.Range(fsize - desired_size, fsize - 1)
databuffer = arc.DataBuffer()
point.StartReading(databuffer)
while True:
    n = 0
    length = 0
    offset = 0
    (r, n, length, offset, buf) = databuffer.for_write(True)
    if not r: break
    sys.stdout.write("BUFFER: %d :  %d  : %s\n" % (offset, length, str(buf)))
    databuffer.is_written(n)
point.StopReading()
Пример #7
0
 def setUp(self):
     self.usercfg = arc.UserConfig(
         arc.initializeCredentialsType(
             arc.initializeCredentialsType.SkipCredentials))
Пример #8
0
    def submit_workers(self, workspec_list):
        retlist = []

        # Get queue info from DB
        pandaqueues = self.dbproxy.get_cache("panda_queues.json", None)
        if pandaqueues is None:
            raise Exception("Failed to get panda queue info from database")
        pandaqueues = pandaqueues.data

        osmap = self.dbproxy.get_cache("ddmendpoints_objectstores.json", None)
        if osmap is None:
            raise Exception("Failed to get Object Store info from database")
        osmap = osmap.data

        for workspec in workspec_list:

            arclog = arc_utils.ARCLogger(baselogger, workspec.workerID)
            tmplog = arclog.log

            # Assume for aCT that jobs are always pre-fetched (no late-binding)
            for jobspec in workspec.get_jobspec_list():

                tmplog.debug("JobSpec: {0}".format(jobspec.values_map()))

                if jobspec.computingSite not in pandaqueues:
                    retlist.append(
                        (False, "No queue information for {0}".format(
                            jobspec.computingSite)))
                    continue

                # Get CEs from panda queue info
                # List of (endpoint, queue) tuples
                arcces = []
                for endpoint in pandaqueues[jobspec.computingSite]['queues']:
                    ce_endpoint = endpoint['ce_endpoint']
                    if not re.search('://', ce_endpoint):
                        ce_endpoint = 'gsiftp://%s' % ce_endpoint
                    ce_queue = endpoint['ce_queue_name']
                    arcces.append((ce_endpoint, ce_queue))

                if not arcces:
                    retlist.append((False, "No CEs defined for %{0}".format(
                        jobspec.computingSite)))
                    continue

                # Set true pilot or not
                queueconfigmapper = QueueConfigMapper()
                queueconfig = queueconfigmapper.get_queue(
                    jobspec.computingSite)
                pandaqueues[
                    jobspec.computingSite]['truepilot'] = queueconfig.truePilot

                # Set log URL for GTAG env in job description
                logbaseurl = queueconfig.submitter.get('logBaseURL')
                logsubdir = self._set_logdir(jobspec.computingSite)
                logfileurl = '/'.join(
                    [logbaseurl, logsubdir,
                     '%d.out' % jobspec.PandaID]) if logbaseurl else None

                tmplog.debug("Converting to ARC XRSL format")
                arcxrsl = ARCParser(
                    jobspec.jobParams,
                    jobspec.computingSite,
                    pandaqueues[jobspec.computingSite],
                    logfileurl,
                    self.schedulerid,
                    osmap,
                    '/tmp',  # tmpdir, TODO common tmp dir
                    None,  #jobSpec.eventranges, # TODO event ranges
                    tmplog)
                arcxrsl.parse()
                xrsl = arcxrsl.getXrsl()
                tmplog.debug("ARC xrsl: {0}".format(xrsl))

                # Set the files to be downloaded at the end of the job
                downloadfiles = 'gmlog/errors'
                if 'logFile' in jobspec.jobParams:
                    downloadfiles += ';%s' % jobspec.jobParams[
                        'logFile'].replace('.tgz', '')
                if not pandaqueues[jobspec.computingSite]['truepilot']:
                    downloadfiles += ';jobSmallFiles.tgz'

                # Set certificate
                userconfig = arc.UserConfig(self.cred_type)
                proxyrole = ''
                if jobspec.jobParams['prodSourceLabel'] == 'user':
                    userconfig.ProxyPath(str(self.certs['pilot']))
                    proxyrole = 'pilot'
                else:
                    userconfig.ProxyPath(str(self.certs['production']))
                    proxyrole = 'production'
                tmplog.debug("Submitting using {0} proxy at {1}".format(
                    proxyrole, userconfig.ProxyPath()))

                try:
                    tmplog.debug("Submission targets: {0}".format(arcces))
                    arcjob = self._arc_submit(xrsl, arcces, userconfig, tmplog)
                    tmplog.info("ARC CE job id {0}".format(arcjob.JobID))
                    arc_utils.arcjob2workspec(arcjob, workspec)
                    workspec.workAttributes['arcdownloadfiles'] = downloadfiles
                    workspec.workAttributes['proxyrole'] = proxyrole
                    workspec.workAttributes['logsubdir'] = logsubdir
                    workspec.batchID = arcjob.JobID
                    tmplog.debug(workspec.workAttributes)
                    result = (True, '')
                except Exception as exc:
                    tmplog.error(traceback.format_exc())
                    result = (False,
                              "Failed to submit ARC job: {0}".format(str(exc)))

                retlist.append(result)

        return retlist