Exemplo n.º 1
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        jobdict = dict([(job.backend.id, job) for job in jobs
                        if job.backend.id])

        # Group jobs by the backend's credential requirements
        cred_to_backend_id_list = defaultdict(list)
        for job in jobs:
            cred_to_backend_id_list[
                job.backend.credential_requirements].append(job.backend.id)

        # Batch the status requests by credential requirement
        jobInfoDict = {}
        for cred_req, job_ids in cred_to_backend_id_list.items():
            # If the credential is not valid or doesn't exist then skip it
            cred = credential_store.get(cred_req)
            if not cred or not cred.is_valid():
                needed_credentials.add(cred_req)
                continue
            # Create a ``Grid`` for each credential requirement and request the relevant jobs through it
            info = Grid.cream_status(job_ids, cred_req)
            jobInfoDict.update(info)

        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.status != info['Current Status'] and (
                        'ExitCode' not in info or
                    ('ExitCode' in info and info['ExitCode'].isdigit())):

                    if 'Worker Node' in info:
                        job.backend.workernode = info['Worker Node']

                    if 'CREAM ISB URI' in info:
                        job.backend.isbURI = info['CREAM ISB URI']

                    if 'CREAM OSB URI' in info:
                        job.backend.osbURI = info['CREAM OSB URI']

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['Current Status'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['Current Status'] in ['DONE-OK', 'DONE-FAILED']:

                        # resolve output sandbox URIs based on the JDL
                        # information
                        osbURIList = __cream_resolveOSBList__(job, info['JDL'])

                        logger.debug('OSB list:')
                        for f in osbURIList:
                            logger.debug(f)

                        if osbURIList:

                            if Grid.cream_get_output(
                                    osbURIList,
                                    job.getOutputWorkspace(
                                        create=True).getPath(),
                                    job.backend.credential_requirements):
                                (ick,
                                 app_exitcode) = Grid.__get_app_exitcode__(
                                     job.getOutputWorkspace(
                                         create=True).getPath())
                                job.backend.exitcode = app_exitcode

                                jidListForPurge.append(job.backend.id)

                            else:
                                logger.error(
                                    'fail to download job output: %s' %
                                    jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['Current Status']
                        if 'ExitCode' in info and info['ExitCode'] != "W":
                            try:
                                job.backend.exitcode_cream = int(
                                    info['ExitCode'])
                            except:
                                job.backend.exitcode_cream = 1

                        if 'FailureReason' in info:
                            try:
                                job.backend.reason = info['FailureReason']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning('fail to retrieve job informaton: %s' %
                               jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            for cred_req, job_ids in cred_to_backend_id_list.items():
                Grid.cream_purge_multiple(
                    set(job_ids) & set(jidListForPurge), cred_req)
Exemplo n.º 2
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        jobdict = dict([[job.backend.id, job]
                        for job in jobs if job.backend.id])

        jobInfoDict = Grid.cream_status(jobdict.keys())

        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.status != info['Current Status'] and ('ExitCode' not in info or ('ExitCode' in info and info['ExitCode'].isdigit())):

                    if 'Worker Node' in info:
                        job.backend.workernode = info['Worker Node']

                    if 'CREAM ISB URI' in info:
                        job.backend.isbURI = info['CREAM ISB URI']

                    if 'CREAM OSB URI' in info:
                        job.backend.osbURI = info['CREAM OSB URI']

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['Current Status'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['Current Status'] in ['DONE-OK', 'DONE-FAILED']:

                        # resolve output sandbox URIs based on the JDL
                        # information
                        osbURIList = __cream_resolveOSBList__(job, info['JDL'])

                        logger.debug('OSB list:')
                        for f in osbURIList:
                            logger.debug(f)

                        if osbURIList:

                            if Grid.cream_get_output(osbURIList, job.getOutputWorkspace(create=True).getPath() ):
                                (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                    job.getOutputWorkspace(create=True).getPath() )
                                job.backend.exitcode = app_exitcode

                                jidListForPurge.append(job.backend.id)

                            else:
                                logger.error(
                                    'fail to download job output: %s' % jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['Current Status']
                        if 'ExitCode' in info and info['ExitCode'] != "W":
                            try:
                                job.backend.exitcode_cream = int(
                                    info['ExitCode'])
                            except:
                                job.backend.exitcode_cream = 1

                        if 'FailureReason' in info:
                            try:
                                job.backend.reason = info['FailureReason']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning(
                    'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.'))

            # purging the jobs the output has been fetched locally
            if jidListForPurge:
                Grid.cream_purgeMultiple(jidListForPurge)
Exemplo n.º 3
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        jobdict = dict([[job.backend.id, job]
                        for job in jobs if job.backend.id])

        jobInfoDict = Grid.cream_status(jobdict.keys())

        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.status != info['Current Status'] and ('ExitCode' not in info or ('ExitCode' in info and info['ExitCode'].isdigit())):

                    if 'Worker Node' in info:
                        job.backend.workernode = info['Worker Node']

                    if 'CREAM ISB URI' in info:
                        job.backend.isbURI = info['CREAM ISB URI']

                    if 'CREAM OSB URI' in info:
                        job.backend.osbURI = info['CREAM OSB URI']

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['Current Status'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['Current Status'] in ['DONE-OK', 'DONE-FAILED']:

                        # resolve output sandbox URIs based on the JDL
                        # information
                        osbURIList = __cream_resolveOSBList__(job, info['JDL'])

                        logger.debug('OSB list:')
                        for f in osbURIList:
                            logger.debug(f)

                        if osbURIList:

                            if Grid.cream_get_output(osbURIList, job.getOutputWorkspace(create=True).getPath() ):
                                (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                    job.getOutputWorkspace(create=True).getPath() )
                                job.backend.exitcode = app_exitcode

                                jidListForPurge.append(job.backend.id)

                            else:
                                logger.error(
                                    'fail to download job output: %s' % jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['Current Status']
                        if 'ExitCode' in info and info['ExitCode'] != "W":
                            try:
                                job.backend.exitcode_cream = int(
                                    info['ExitCode'])
                            except:
                                job.backend.exitcode_cream = 1

                        if 'FailureReason' in info:
                            try:
                                job.backend.reason = info['FailureReason']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning(
                    'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.'))

            # purging the jobs the output has been fetched locally
            if jidListForPurge:
                Grid.cream_purgeMultiple(jidListForPurge)
Exemplo n.º 4
0
    def updateMonitoringInformation(jobs):
        '''Monitoring loop for normal jobs'''

        jobdict = dict([(job.backend.id, job) for job in jobs if job.backend.id])

        # Group jobs by the backend's credential requirements
        cred_to_backend_id_list = defaultdict(list)
        for job in jobs:
            cred_to_backend_id_list[job.backend.credential_requirements].append(job.backend.id)

        # Batch the status requests by credential requirement
        jobInfoDict = {}
        for cred_req, job_ids in cred_to_backend_id_list.items():
            # If the credential is not valid or doesn't exist then skip it
            cred = credential_store.get(cred_req)
            if not cred or not cred.is_valid():
                    needed_credentials.add(cred_req)
                    continue
            # Create a ``Grid`` for each credential requirement and request the relevant jobs through it
            info = Grid.cream_status(job_ids, cred_req)
            jobInfoDict.update(info)

        jidListForPurge = []

        # update job information for those available in jobInfoDict
        for id, info in jobInfoDict.items():

            if info:

                job = jobdict[id]

                if job.backend.status != info['Current Status'] and ('ExitCode' not in info or ('ExitCode' in info and info['ExitCode'].isdigit())):

                    if 'Worker Node' in info:
                        job.backend.workernode = info['Worker Node']

                    if 'CREAM ISB URI' in info:
                        job.backend.isbURI = info['CREAM ISB URI']

                    if 'CREAM OSB URI' in info:
                        job.backend.osbURI = info['CREAM OSB URI']

                    doStatusUpdate = True

                    # no need to update Ganga job status if backend status is
                    # not changed
                    if info['Current Status'] == job.backend.status:
                        doStatusUpdate = False

                    # download output sandboxes if final status is reached
                    elif info['Current Status'] in ['DONE-OK', 'DONE-FAILED']:

                        # resolve output sandbox URIs based on the JDL
                        # information
                        osbURIList = __cream_resolveOSBList__(job, info['JDL'])

                        logger.debug('OSB list:')
                        for f in osbURIList:
                            logger.debug(f)

                        if osbURIList:

                            if Grid.cream_get_output(osbURIList, job.getOutputWorkspace(create=True).getPath(), job.backend.credential_requirements):
                                (ick, app_exitcode) = Grid.__get_app_exitcode__(
                                    job.getOutputWorkspace(create=True).getPath())
                                job.backend.exitcode = app_exitcode

                                jidListForPurge.append(job.backend.id)

                            else:
                                logger.error(
                                    'fail to download job output: %s' % jobdict[id].getFQID('.'))

                    if doStatusUpdate:
                        job.backend.status = info['Current Status']
                        if 'ExitCode' in info and info['ExitCode'] != "W":
                            try:
                                job.backend.exitcode_cream = int(
                                    info['ExitCode'])
                            except:
                                job.backend.exitcode_cream = 1

                        if 'FailureReason' in info:
                            try:
                                job.backend.reason = info['FailureReason']
                            except:
                                pass

                        job.backend.updateGangaJobStatus()
            else:
                logger.warning(
                    'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.'))

        # purging the jobs the output has been fetched locally
        if jidListForPurge:
            for cred_req, job_ids in cred_to_backend_id_list.items():
                Grid.cream_purge_multiple(set(job_ids) & set(jidListForPurge), cred_req)