示例#1
0
class InputDataValidation( OptimizerExecutor ):
  """
      The specific Optimizer must provide the following methods:
      - initializeOptimizer() before each execution cycle
      - checkJob() - the main method called for each job
  """

  @classmethod
  def initializeOptimizer( cls ):
    """ Initialization of the Agent.
    """
    random.seed()
    cls.__SEStatus = DictCache.DictCache()
    cls.__sitesForSE = DictCache.DictCache()
    try:
      from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB
    except ImportError, excp :
      return S_ERROR( "Could not import JobDB: %s" % str( excp ) )

    try:
      cls.__jobDB = JobDB()
    except RuntimeError:
      return S_ERROR( "Cannot connect to JobDB" )

    cls.__siteStatus = SiteStatus()

    cls.ex_setOption( "FailedStatus", "Input Data Not Available" )
    return S_OK()
示例#2
0
    def __init__(self,
                 pilotAgentsDB=None,
                 jobDB=None,
                 tqDB=None,
                 jlDB=None,
                 opsHelper=None):
        """ c'tor
    """
        if pilotAgentsDB:
            self.pilotAgentsDB = pilotAgentsDB
        else:
            self.pilotAgentsDB = PilotAgentsDB()
        if jobDB:
            self.jobDB = jobDB
        else:
            self.jobDB = JobDB()
        if tqDB:
            self.tqDB = tqDB
        else:
            self.tqDB = TaskQueueDB()
        if jlDB:
            self.jlDB = jlDB
        else:
            self.jlDB = JobLoggingDB()

        if opsHelper:
            self.opsHelper = opsHelper
        else:
            self.opsHelper = Operations()

        self.log = gLogger.getSubLogger("Matcher")

        self.limiter = Limiter(jobDB=self.jobDB, opsHelper=self.opsHelper)

        self.siteClient = SiteStatus()
示例#3
0
 def initialize(self):
     """ Standard constructor
 """
     self.am_setOption("PollingTime", 60.0)
     self.am_setOption("maxPilotWaitingHours", 6)
     self.queueDict = {}
     self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE
     self.maxPilotsToSubmit = MAX_PILOTS_TO_SUBMIT
     self.siteStatus = SiteStatus()
     return S_OK()
示例#4
0
 def _updateSiteMask(self, sitesData):
     siteStatus = SiteStatus()
     siteMaskStatus = dict(sitesData)
     for site in siteMaskStatus:
         #
         #FIXME: we are only taking into account ComputingAccess
         #
         if siteStatus.isUsableSite(site, 'ComputingAccess'):
             siteMaskStatus[site]['siteMaskStatus'] = 'Allowed'
         else:
             siteMaskStatus[site]['siteMaskStatus'] = 'Banned'
         sitesData[site]['siteMaskStatus'] = siteMaskStatus[site][
             'siteMaskStatus']
     return S_OK(sitesData)
示例#5
0
    def initialize(self):
        """ Standard initialize.
    """

        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads',
                                               self.__maxNumberOfThreads)
        self.threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)

        self.siteClient = SiteStatus()

        self.clients['SiteStatus'] = self.siteClient
        self.clients['ResourceManagementClient'] = ResourceManagementClient()

        return S_OK()
示例#6
0
    def __init__(self):
        """Internal initialization of the DIRAC Admin API."""
        super(DiracAdmin, self).__init__()

        self.csAPI = CSAPI()

        self.dbg = False
        if gConfig.getValue(self.section + "/LogLevel", "DEBUG") == "DEBUG":
            self.dbg = True

        self.scratchDir = gConfig.getValue(self.section + "/ScratchDir",
                                           "/tmp")
        self.currentDir = os.getcwd()
        self.rssFlag = ResourceStatus().rssFlag
        self.sitestatus = SiteStatus()
示例#7
0
    def __init__(self):
        """Internal initialization of the DIRAC Admin API.
    """
        super(DiracAdmin, self).__init__()

        self.csAPI = CSAPI()

        self.dbg = False
        if gConfig.getValue(self.section + '/LogLevel', 'DEBUG') == 'DEBUG':
            self.dbg = True

        self.scratchDir = gConfig.getValue(self.section + '/ScratchDir',
                                           '/tmp')
        self.currentDir = os.getcwd()
        self.rssFlag = ResourceStatus().rssFlag
        self.sitestatus = SiteStatus()
        self._siteSet = set(getSites().get('Value', []))
示例#8
0
    def printCEInfo(voName):

        resultQueues = Resources.getQueues(community=voName)
        if not resultQueues["OK"]:
            gLogger.error("Failed to get CE information")
            DIRACExit(-1)

        fields = ("Site", "CE", "CEType", "Queue", "Status")
        records = []

        # get list of usable sites within this cycle
        resultMask = SiteStatus().getUsableSites()
        if not resultMask["OK"]:
            return resultMask
        siteMaskList = resultMask.get("Value", [])

        rssClient = ResourceStatus()

        for site in resultQueues["Value"]:
            siteStatus = "Active" if site in siteMaskList else "InActive"
            siteNew = True
            for ce in resultQueues["Value"][site]:

                ceStatus = siteStatus
                if rssClient.rssFlag:
                    result = rssClient.getElementStatus(ce, "ComputingElement")
                    if result["OK"]:
                        ceStatus = result["Value"][ce]["all"]

                ceNew = True
                for queue in resultQueues["Value"][site][ce]["Queues"]:
                    pSite = site if siteNew else ""
                    pCE = ""
                    ceType = ""
                    if ceNew:
                        pCE = ce
                        ceType = resultQueues["Value"][site][ce]["CEType"]
                    records.append((pSite, pCE, ceType, queue, ceStatus))
                    ceNew = False
                    siteNew = False

        gLogger.notice(
            printTable(fields, records, printOut=False, columnSeparator="  "))
        return S_OK()
示例#9
0
def printCEInfo(voName):

    resultQueues = Resources.getQueues(community=voName)
    if not resultQueues['OK']:
        gLogger.error('Failed to get CE information')
        DIRACExit(-1)

    fields = ("Site", 'CE', 'CEType', 'Queue', 'Status')
    records = []

    # get list of usable sites within this cycle
    resultMask = SiteStatus().getUsableSites()
    if not resultMask['OK']:
        return resultMask
    siteMaskList = resultMask.get('Value', [])

    rssClient = ResourceStatus()

    for site in resultQueues['Value']:
        siteStatus = "Active" if site in siteMaskList else "InActive"
        siteNew = True
        for ce in resultQueues['Value'][site]:

            ceStatus = siteStatus
            if rssClient.rssFlag:
                result = rssClient.getElementStatus(ce, "ComputingElement")
                if result['OK']:
                    ceStatus = result['Value'][ce]['all']

            ceNew = True
            for queue in resultQueues['Value'][site][ce]['Queues']:
                pSite = site if siteNew else ''
                pCE = ''
                ceType = ''
                if ceNew:
                    pCE = ce
                    ceType = resultQueues['Value'][site][ce]['CEType']
                records.append((pSite, pCE, ceType, queue, ceStatus))
                ceNew = False
                siteNew = False

    gLogger.notice(
        printTable(fields, records, printOut=False, columnSeparator='  '))
    return S_OK()
示例#10
0
  def __init__( self ):
    """
    Constructor, initializes the logger, rssClient and caches.

    examples
      >>> resourceStatus = ResourceStatus()
    """

    super( ResourceStatus, self ).__init__()
    
    self.siteStatus = SiteStatus()
    
    # We can set CacheLifetime and CacheHistory from CS, so that we can tune them.
    cacheLifeTime = int( RssConfiguration().getConfigCache() )
    
    # RSSCaches, one per elementType ( StorageElement, ComputingElement )
    # Should be generated on the fly, instead of being hardcoded ?
    self.seCache = RSSCache( 'Storage', cacheLifeTime, self._updateSECache )
    self.ceCache = RSSCache( 'Computing', cacheLifeTime, self._updateCECache )
示例#11
0
  def __init__( self, submitPool ):
    """
     Define the logger and some defaults
    """

    if submitPool == self.gridMiddleware:
      self.log = gLogger.getSubLogger( '%sPilotDirector' % self.gridMiddleware )
    else:
      self.log = gLogger.getSubLogger( '%sPilotDirector/%s' % ( self.gridMiddleware, submitPool ) )

    self.pilot = DIRAC_PILOT
    self.submitPoolOption = '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % submitPool
    self.extraPilotOptions = []
    self.installVersion = DIRAC_VERSION
    self.installProject = DIRAC_PROJECT
    self.installation = DIRAC_INSTALLATION
    self.pilotExtensionsList = []

    self.virtualOrganization = VIRTUAL_ORGANIZATION
    self.install = DIRAC_INSTALL
    self.extraModules = DIRAC_MODULES
    self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE
    self.targetGrids = [ self.gridMiddleware ]


    self.enableListMatch = ENABLE_LISTMATCH
    self.listMatchDelay = LISTMATCH_DELAY
    self.listMatchCache = DictCache()

    self.privatePilotFraction = PRIVATE_PILOT_FRACTION

    self.errorClearTime = ERROR_CLEAR_TIME
    self.errorTicketTime = ERROR_TICKET_TIME
    self.errorMailAddress = DIRAC.errorMail
    self.alarmMailAddress = DIRAC.alarmMail
    self.mailFromAddress = FROM_MAIL

    self.siteClient = SiteStatus()

    if not  'log' in self.__dict__:
      self.log = gLogger.getSubLogger( 'PilotDirector' )
    self.log.info( 'Initialized' )
示例#12
0
    def __checkSitesInMask(self, job, siteCandidates):
        """Returns list of site candidates that are in current mask.
    """

        siteStatus = SiteStatus()
        result = siteStatus.getUsableSites('ComputingAccess')
        if not result['OK']:
            return S_ERROR('Could not get site mask')

        sites = []
        usableSites = result['Value']
        for candidate in siteCandidates:
            if not candidate in usableSites:
                self.log.verbose(
                    '%s is a candidate site for job %s but not in mask' %
                    (candidate, job))
            else:
                sites.append(candidate)

        self.log.info('Candidate sites in Mask are %s' % (sites))

        return S_OK(sites)
示例#13
0
    def getSiteMask(self, printOutput=False):
        """Retrieve current site mask from WMS Administrator service.

       Example usage:

       >>> print diracAdmin.getSiteMask()
       {'OK': True, 'Value': 0L}

       :returns: S_OK,S_ERROR

    """

        siteStatus = SiteStatus()
        result = siteStatus.getUsableSites('ComputingAccess')
        if result['OK']:
            sites = result['Value']
            if printOutput:
                sites.sort()
                for site in sites:
                    print site

        return result
示例#14
0
    def getBannedSites(self, printOutput=False):
        """Retrieve current list of banned sites.

       Example usage:

       >>> print diracAdmin.getBannedSites()
       {'OK': True, 'Value': []}

       :returns: S_OK,S_ERROR

    """
        siteStatus = SiteStatus()

        result = siteStatus.getUnusableSites('ComputingAccess')
        if not result['OK']:
            self.log.warn(result['Message'])
            return result
        bannedSites = result['Value']

        bannedSites.sort()
        if printOutput:
            print '\n'.join(bannedSites)
        return S_OK(bannedSites)
示例#15
0
 def initialize(self):
     self.__opsHelper = self.__getOpsHelper()
     self.__limiter = Limiter(self.__opsHelper)
     self.__siteStatus = SiteStatus()
示例#16
0
def main():
    global fullMatch
    global sites
    Script.registerSwitch("F", "full-match", "Check all the matching criteria",
                          setFullMatch)
    Script.registerSwitch(
        "S:", "site=", "Check matching for these sites (comma separated list)",
        setSites)
    Script.registerArgument("job_JDL: file with job JDL description")
    _, args = Script.parseCommandLine(ignoreErrors=True)

    from DIRAC.Core.Security.ProxyInfo import getVOfromProxyGroup
    from DIRAC.ConfigurationSystem.Client.Helpers import Resources
    from DIRAC.Core.Utilities.PrettyPrint import printTable
    from DIRAC.ResourceStatusSystem.Client.ResourceStatus import ResourceStatus
    from DIRAC.ResourceStatusSystem.Client.SiteStatus import SiteStatus
    from DIRAC.WorkloadManagementSystem.Utilities.QueueUtilities import getQueuesResolved, matchQueue

    with open(args[0]) as f:
        jdl = f.read()

    # Get the current VO
    result = getVOfromProxyGroup()
    if not result["OK"]:
        gLogger.error("No proxy found, please login")
        DIRACExit(-1)
    voName = result["Value"]

    resultQueues = Resources.getQueues(siteList=sites, community=voName)
    if not resultQueues["OK"]:
        gLogger.error("Failed to get CE information")
        DIRACExit(-1)
    siteDict = resultQueues["Value"]
    result = getQueuesResolved(siteDict, {}, checkPlatform=True)
    if not resultQueues["OK"]:
        gLogger.error("Failed to get CE information")
        DIRACExit(-1)
    queueDict = result["Value"]

    # get list of usable sites within this cycle
    resultMask = SiteStatus().getUsableSites()
    if not resultMask["OK"]:
        gLogger.error("Failed to get Site mask information")
        DIRACExit(-1)
    siteMaskList = resultMask.get("Value", [])

    rssClient = ResourceStatus()

    fields = ("Site", "CE", "Queue", "Status", "Match", "Reason")
    records = []

    for queue, queueInfo in queueDict.items():
        site = queueInfo["Site"]
        ce = queueInfo["CEName"]
        siteStatus = "Active" if site in siteMaskList else "InActive"
        ceStatus = siteStatus
        if rssClient.rssFlag:
            result = rssClient.getElementStatus(ce, "ComputingElement")
            if result["OK"]:
                ceStatus = result["Value"][ce]["all"]

        result = matchQueue(jdl,
                            queueInfo["ParametersDict"],
                            fullMatch=fullMatch)
        if not result["OK"]:
            gLogger.error("Failed in getting match data", result["Message"])
            DIRACExit(-1)
        status = "Active" if siteStatus == "Active" and ceStatus == "Active" else "Inactive"
        if result["Value"]["Match"]:
            records.append(
                (site, ce, queueInfo["QueueName"], status, "Yes", ""))
        else:
            records.append((site, ce, queueInfo["QueueName"], status, "No",
                            result["Value"]["Reason"]))

    gLogger.notice(
        printTable(fields,
                   records,
                   sortField="Site",
                   columnSeparator="  ",
                   printOut=False))
示例#17
0
    def getPilotSummaryWeb(self, selectDict, sortList, startItem, maxItems):
        """ Get summary of the pilot jobs status by CE/site in a standard structure
    """

        stateNames = [
            'Submitted', 'Ready', 'Scheduled', 'Waiting', 'Running', 'Done',
            'Aborted', 'Failed'
        ]
        allStateNames = stateNames + ['Done_Empty', 'Aborted_Hour']
        paramNames = ['Site', 'CE'] + allStateNames

        last_update = None
        if 'LastUpdateTime' in selectDict:
            last_update = selectDict['LastUpdateTime']
            del selectDict['LastUpdateTime']
        site_select = []
        if 'GridSite' in selectDict:
            site_select = selectDict['GridSite']
            if not isinstance(site_select, list):
                site_select = [site_select]
            del selectDict['GridSite']

        status_select = []
        if 'Status' in selectDict:
            status_select = selectDict['Status']
            if not isinstance(status_select, list):
                status_select = [status_select]
            del selectDict['Status']

        expand_site = ''
        if 'ExpandSite' in selectDict:
            expand_site = selectDict['ExpandSite']
            site_select = [expand_site]
            del selectDict['ExpandSite']

        # Get all the data from the database with various selections
        result = self.getCounters('PilotAgents',
                                  ['GridSite', 'DestinationSite', 'Status'],
                                  selectDict,
                                  newer=last_update,
                                  timeStamp='LastUpdateTime')
        if not result['OK']:
            return result

        last_update = Time.dateTime() - Time.hour
        selectDict['Status'] = 'Aborted'
        resultHour = self.getCounters(
            'PilotAgents', ['GridSite', 'DestinationSite', 'Status'],
            selectDict,
            newer=last_update,
            timeStamp='LastUpdateTime')
        if not resultHour['OK']:
            return resultHour

        last_update = Time.dateTime() - Time.day
        selectDict['Status'] = ['Aborted', 'Done']
        resultDay = self.getCounters('PilotAgents',
                                     ['GridSite', 'DestinationSite', 'Status'],
                                     selectDict,
                                     newer=last_update,
                                     timeStamp='LastUpdateTime')
        if not resultDay['OK']:
            return resultDay
        selectDict['CurrentJobID'] = 0
        selectDict['Status'] = 'Done'
        resultDayEmpty = self.getCounters(
            'PilotAgents', ['GridSite', 'DestinationSite', 'Status'],
            selectDict,
            newer=last_update,
            timeStamp='LastUpdateTime')
        if not resultDayEmpty['OK']:
            return resultDayEmpty

        ceMap = {}
        resMap = getCESiteMapping()
        if resMap['OK']:
            ceMap = resMap['Value']

        # Sort out different counters
        resultDict = {}
        resultDict['Unknown'] = {}
        for attDict, count in result['Value']:
            site = attDict['GridSite']
            ce = attDict['DestinationSite']
            state = attDict['Status']
            if site == 'Unknown' and ce != "Unknown" and ce != "Multiple" and ce in ceMap:
                site = ceMap[ce]
            if site not in resultDict:
                resultDict[site] = {}
            if ce not in resultDict[site]:
                resultDict[site][ce] = {}
                for p in allStateNames:
                    resultDict[site][ce][p] = 0

            resultDict[site][ce][state] = count

        for attDict, count in resultDay['Value']:
            site = attDict['GridSite']
            ce = attDict['DestinationSite']
            state = attDict['Status']
            if site == 'Unknown' and ce != "Unknown" and ce in ceMap:
                site = ceMap[ce]
            if state == "Done":
                resultDict[site][ce]["Done"] = count
            if state == "Aborted":
                resultDict[site][ce]["Aborted"] = count

        for attDict, count in resultDayEmpty['Value']:
            site = attDict['GridSite']
            ce = attDict['DestinationSite']
            state = attDict['Status']
            if site == 'Unknown' and ce != "Unknown" and ce in ceMap:
                site = ceMap[ce]
            if state == "Done":
                resultDict[site][ce]["Done_Empty"] = count

        for attDict, count in resultHour['Value']:
            site = attDict['GridSite']
            ce = attDict['DestinationSite']
            state = attDict['Status']
            if site == 'Unknown' and ce != "Unknown" and ce in ceMap:
                site = ceMap[ce]
            if state == "Aborted":
                resultDict[site][ce]["Aborted_Hour"] = count

        records = []
        siteSumDict = {}
        for site in resultDict:
            sumDict = {}
            for state in allStateNames:
                if state not in sumDict:
                    sumDict[state] = 0
            sumDict['Total'] = 0
            for ce in resultDict[site]:
                itemList = [site, ce]
                total = 0
                for state in allStateNames:
                    itemList.append(resultDict[site][ce][state])
                    sumDict[state] += resultDict[site][ce][state]
                    if state == "Done":
                        done = resultDict[site][ce][state]
                    if state == "Done_Empty":
                        empty = resultDict[site][ce][state]
                    if state == "Aborted":
                        aborted = resultDict[site][ce][state]
                    if state != "Aborted_Hour" and state != "Done_Empty":
                        total += resultDict[site][ce][state]

                sumDict['Total'] += total
                # Add the total number of pilots seen in the last day
                itemList.append(total)
                # Add pilot submission efficiency evaluation
                if (done - empty) > 0:
                    eff = done / (done - empty)
                elif done == 0:
                    eff = 0.
                elif empty == done:
                    eff = 99.
                else:
                    eff = 0.
                itemList.append('%.2f' % eff)
                # Add pilot job efficiency evaluation
                if total > 0:
                    eff = (total - aborted) / total * 100
                else:
                    eff = 100.
                itemList.append('%.2f' % eff)

                # Evaluate the quality status of the CE
                if total > 10:
                    if eff < 25.:
                        itemList.append('Bad')
                    elif eff < 60.:
                        itemList.append('Poor')
                    elif eff < 85.:
                        itemList.append('Fair')
                    else:
                        itemList.append('Good')
                else:
                    itemList.append('Idle')

                if len(resultDict[site]) == 1 or expand_site:
                    records.append(itemList)

            if len(resultDict[site]) > 1 and not expand_site:
                itemList = [site, 'Multiple']
                for state in allStateNames + ['Total']:
                    if state in sumDict:
                        itemList.append(sumDict[state])
                    else:
                        itemList.append(0)
                done = sumDict["Done"]
                empty = sumDict["Done_Empty"]
                aborted = sumDict["Aborted"]
                total = sumDict["Total"]

                # Add pilot submission efficiency evaluation
                if (done - empty) > 0:
                    eff = done / (done - empty)
                elif done == 0:
                    eff = 0.
                elif empty == done:
                    eff = 99.
                else:
                    eff = 0.
                itemList.append('%.2f' % eff)
                # Add pilot job efficiency evaluation
                if total > 0:
                    eff = (total - aborted) / total * 100
                else:
                    eff = 100.
                itemList.append('%.2f' % eff)

                # Evaluate the quality status of the Site
                if total > 10:
                    if eff < 25.:
                        itemList.append('Bad')
                    elif eff < 60.:
                        itemList.append('Poor')
                    elif eff < 85.:
                        itemList.append('Fair')
                    else:
                        itemList.append('Good')
                else:
                    itemList.append('Idle')
                records.append(itemList)

            for state in allStateNames + ['Total']:
                if state not in siteSumDict:
                    siteSumDict[state] = sumDict[state]
                else:
                    siteSumDict[state] += sumDict[state]

        # Perform site selection
        if site_select:
            new_records = []
            for r in records:
                if r[0] in site_select:
                    new_records.append(r)
            records = new_records

        # Perform status selection
        if status_select:
            new_records = []
            for r in records:
                if r[14] in status_select:
                    new_records.append(r)
            records = new_records

        # Get the Site Mask data
        result = SiteStatus().getUsableSites()
        if result['OK']:
            siteMask = result['Value']
            for r in records:
                if r[0] in siteMask:
                    r.append('Yes')
                else:
                    r.append('No')
        else:
            for r in records:
                r.append('Unknown')

        finalDict = {}
        finalDict['TotalRecords'] = len(records)
        finalDict['ParameterNames'] = paramNames + \
            ['Total', 'PilotsPerJob', 'PilotJobEff', 'Status', 'InMask']

        # Return all the records if maxItems == 0 or the specified number otherwise
        if maxItems:
            finalDict['Records'] = records[startItem:startItem + maxItems]
        else:
            finalDict['Records'] = records

        done = siteSumDict["Done"]
        empty = siteSumDict["Done_Empty"]
        aborted = siteSumDict["Aborted"]
        total = siteSumDict["Total"]

        # Add pilot submission efficiency evaluation
        if (done - empty) > 0:
            eff = done / (done - empty)
        elif done == 0:
            eff = 0.
        elif empty == done:
            eff = 99.
        else:
            eff = 0.
        siteSumDict['PilotsPerJob'] = '%.2f' % eff
        # Add pilot job efficiency evaluation
        if total > 0:
            eff = (total - aborted) / total * 100
        else:
            eff = 100.
        siteSumDict['PilotJobEff'] = '%.2f' % eff

        # Evaluate the overall quality status
        if total > 100:
            if eff < 25.:
                siteSumDict['Status'] = 'Bad'
            elif eff < 60.:
                siteSumDict['Status'] = 'Poor'
            elif eff < 85.:
                siteSumDict['Status'] = 'Fair'
            else:
                siteSumDict['Status'] = 'Good'
        else:
            siteSumDict['Status'] = 'Idle'
        finalDict['Extras'] = siteSumDict

        return S_OK(finalDict)
示例#18
0
    def getPilotSummaryWeb(self, selectDict, sortList, startItem, maxItems):
        """Get summary of the pilot jobs status by CE/site in a standard structure"""
        allStateNames = PilotStatus.PILOT_STATES + [
            "Done_Empty", "Aborted_Hour"
        ]
        paramNames = ["Site", "CE"] + allStateNames

        last_update = None
        if "LastUpdateTime" in selectDict:
            last_update = selectDict["LastUpdateTime"]
            del selectDict["LastUpdateTime"]
        site_select = []
        if "GridSite" in selectDict:
            site_select = selectDict["GridSite"]
            if not isinstance(site_select, list):
                site_select = [site_select]
            del selectDict["GridSite"]

        status_select = []
        if "Status" in selectDict:
            status_select = selectDict["Status"]
            if not isinstance(status_select, list):
                status_select = [status_select]
            del selectDict["Status"]

        expand_site = ""
        if "ExpandSite" in selectDict:
            expand_site = selectDict["ExpandSite"]
            site_select = [expand_site]
            del selectDict["ExpandSite"]

        # Get all the data from the database with various selections
        result = self.getCounters(
            "PilotAgents",
            ["GridSite", "DestinationSite", "Status"],
            selectDict,
            newer=last_update,
            timeStamp="LastUpdateTime",
        )
        if not result["OK"]:
            return result

        last_update = Time.dateTime() - Time.hour
        selectDict["Status"] = PilotStatus.ABORTED
        resultHour = self.getCounters(
            "PilotAgents",
            ["GridSite", "DestinationSite", "Status"],
            selectDict,
            newer=last_update,
            timeStamp="LastUpdateTime",
        )
        if not resultHour["OK"]:
            return resultHour

        last_update = Time.dateTime() - Time.day
        selectDict["Status"] = [PilotStatus.ABORTED, PilotStatus.DONE]
        resultDay = self.getCounters(
            "PilotAgents",
            ["GridSite", "DestinationSite", "Status"],
            selectDict,
            newer=last_update,
            timeStamp="LastUpdateTime",
        )
        if not resultDay["OK"]:
            return resultDay
        selectDict["CurrentJobID"] = 0
        selectDict["Status"] = PilotStatus.DONE
        resultDayEmpty = self.getCounters(
            "PilotAgents",
            ["GridSite", "DestinationSite", "Status"],
            selectDict,
            newer=last_update,
            timeStamp="LastUpdateTime",
        )
        if not resultDayEmpty["OK"]:
            return resultDayEmpty

        ceMap = {}
        resMap = getCESiteMapping()
        if resMap["OK"]:
            ceMap = resMap["Value"]

        # Sort out different counters
        resultDict = {}
        resultDict["Unknown"] = {}
        for attDict, count in result["Value"]:
            site = attDict["GridSite"]
            ce = attDict["DestinationSite"]
            state = attDict["Status"]
            if site == "Unknown" and ce != "Unknown" and ce != "Multiple" and ce in ceMap:
                site = ceMap[ce]
            if site not in resultDict:
                resultDict[site] = {}
            if ce not in resultDict[site]:
                resultDict[site][ce] = {}
                for p in allStateNames:
                    resultDict[site][ce][p] = 0

            resultDict[site][ce][state] = count

        for attDict, count in resultDay["Value"]:
            site = attDict["GridSite"]
            ce = attDict["DestinationSite"]
            state = attDict["Status"]
            if site == "Unknown" and ce != "Unknown" and ce in ceMap:
                site = ceMap[ce]
            if state == PilotStatus.DONE:
                resultDict[site][ce][PilotStatus.DONE] = count
            if state == PilotStatus.ABORTED:
                resultDict[site][ce][PilotStatus.ABORTED] = count

        for attDict, count in resultDayEmpty["Value"]:
            site = attDict["GridSite"]
            ce = attDict["DestinationSite"]
            state = attDict["Status"]
            if site == "Unknown" and ce != "Unknown" and ce in ceMap:
                site = ceMap[ce]
            if state == PilotStatus.DONE:
                resultDict[site][ce]["Done_Empty"] = count

        for attDict, count in resultHour["Value"]:
            site = attDict["GridSite"]
            ce = attDict["DestinationSite"]
            state = attDict["Status"]
            if site == "Unknown" and ce != "Unknown" and ce in ceMap:
                site = ceMap[ce]
            if state == PilotStatus.ABORTED:
                resultDict[site][ce]["Aborted_Hour"] = count

        records = []
        siteSumDict = {}
        for site in resultDict:
            sumDict = {}
            for state in allStateNames:
                if state not in sumDict:
                    sumDict[state] = 0
            sumDict["Total"] = 0
            for ce in resultDict[site]:
                itemList = [site, ce]
                total = 0
                for state in allStateNames:
                    itemList.append(resultDict[site][ce][state])
                    sumDict[state] += resultDict[site][ce][state]
                    if state == PilotStatus.DONE:
                        done = resultDict[site][ce][state]
                    if state == "Done_Empty":
                        empty = resultDict[site][ce][state]
                    if state == PilotStatus.ABORTED:
                        aborted = resultDict[site][ce][state]
                    if state != "Aborted_Hour" and state != "Done_Empty":
                        total += resultDict[site][ce][state]

                sumDict["Total"] += total
                # Add the total number of pilots seen in the last day
                itemList.append(total)
                # Add pilot submission efficiency evaluation
                if (done - empty) > 0:
                    eff = done / (done - empty)
                elif done == 0:
                    eff = 0.0
                elif empty == done:
                    eff = 99.0
                else:
                    eff = 0.0
                itemList.append("%.2f" % eff)
                # Add pilot job efficiency evaluation
                if total > 0:
                    eff = (total - aborted) / total * 100
                else:
                    eff = 100.0
                itemList.append("%.2f" % eff)

                # Evaluate the quality status of the CE
                if total > 10:
                    if eff < 25.0:
                        itemList.append("Bad")
                    elif eff < 60.0:
                        itemList.append("Poor")
                    elif eff < 85.0:
                        itemList.append("Fair")
                    else:
                        itemList.append("Good")
                else:
                    itemList.append("Idle")

                if len(resultDict[site]) == 1 or expand_site:
                    records.append(itemList)

            if len(resultDict[site]) > 1 and not expand_site:
                itemList = [site, "Multiple"]
                for state in allStateNames + ["Total"]:
                    if state in sumDict:
                        itemList.append(sumDict[state])
                    else:
                        itemList.append(0)
                done = sumDict[PilotStatus.DONE]
                empty = sumDict["Done_Empty"]
                aborted = sumDict[PilotStatus.ABORTED]
                total = sumDict["Total"]

                # Add pilot submission efficiency evaluation
                if (done - empty) > 0:
                    eff = done / (done - empty)
                elif done == 0:
                    eff = 0.0
                elif empty == done:
                    eff = 99.0
                else:
                    eff = 0.0
                itemList.append("%.2f" % eff)
                # Add pilot job efficiency evaluation
                if total > 0:
                    eff = (total - aborted) / total * 100
                else:
                    eff = 100.0
                itemList.append("%.2f" % eff)

                # Evaluate the quality status of the Site
                if total > 10:
                    if eff < 25.0:
                        itemList.append("Bad")
                    elif eff < 60.0:
                        itemList.append("Poor")
                    elif eff < 85.0:
                        itemList.append("Fair")
                    else:
                        itemList.append("Good")
                else:
                    itemList.append("Idle")
                records.append(itemList)

            for state in allStateNames + ["Total"]:
                if state not in siteSumDict:
                    siteSumDict[state] = sumDict[state]
                else:
                    siteSumDict[state] += sumDict[state]

        # Perform site selection
        if site_select:
            new_records = []
            for r in records:
                if r[0] in site_select:
                    new_records.append(r)
            records = new_records

        # Perform status selection
        if status_select:
            new_records = []
            for r in records:
                if r[14] in status_select:
                    new_records.append(r)
            records = new_records

        # Get the Site Mask data
        result = SiteStatus().getUsableSites()
        if result["OK"]:
            siteMask = result["Value"]
            for r in records:
                if r[0] in siteMask:
                    r.append("Yes")
                else:
                    r.append("No")
        else:
            for r in records:
                r.append("Unknown")

        finalDict = {}
        finalDict["TotalRecords"] = len(records)
        finalDict["ParameterNames"] = paramNames + [
            "Total", "PilotsPerJob", "PilotJobEff", "Status", "InMask"
        ]

        # Return all the records if maxItems == 0 or the specified number otherwise
        if maxItems:
            finalDict["Records"] = records[startItem:startItem + maxItems]
        else:
            finalDict["Records"] = records

        done = siteSumDict[PilotStatus.DONE]
        empty = siteSumDict["Done_Empty"]
        aborted = siteSumDict[PilotStatus.ABORTED]
        total = siteSumDict["Total"]

        # Add pilot submission efficiency evaluation
        if (done - empty) > 0:
            eff = done / (done - empty)
        elif done == 0:
            eff = 0.0
        elif empty == done:
            eff = 99.0
        else:
            eff = 0.0
        siteSumDict["PilotsPerJob"] = "%.2f" % eff
        # Add pilot job efficiency evaluation
        if total > 0:
            eff = (total - aborted) / total * 100
        else:
            eff = 100.0
        siteSumDict["PilotJobEff"] = "%.2f" % eff

        # Evaluate the overall quality status
        if total > 100:
            if eff < 25.0:
                siteSumDict["Status"] = "Bad"
            elif eff < 60.0:
                siteSumDict["Status"] = "Poor"
            elif eff < 85.0:
                siteSumDict["Status"] = "Fair"
            else:
                siteSumDict["Status"] = "Good"
        else:
            siteSumDict["Status"] = "Idle"
        finalDict["Extras"] = siteSumDict

        return S_OK(finalDict)
示例#19
0
        DIRACExit(-1)
    voName = result['Value']

    resultQueues = Resources.getQueues(siteList=sites, community=voName)
    if not resultQueues['OK']:
        gLogger.error('Failed to get CE information')
        DIRACExit(-1)
    siteDict = resultQueues['Value']
    result = getQueuesResolved(siteDict)
    if not resultQueues['OK']:
        gLogger.error('Failed to get CE information')
        DIRACExit(-1)
    queueDict = result['Value']

    # get list of usable sites within this cycle
    resultMask = SiteStatus().getUsableSites()
    if not resultMask['OK']:
        gLogger.error('Failed to get Site mask information')
        DIRACExit(-1)
    siteMaskList = resultMask.get('Value', [])

    rssClient = ResourceStatus()

    fields = ('Site', 'CE', 'Queue', 'Status', 'Match', 'Reason')
    records = []

    for queue, queueInfo in queueDict.iteritems():
        site = queueInfo['Site']
        ce = queueInfo['CEName']
        siteStatus = "Active" if site in siteMaskList else "InActive"
        ceStatus = siteStatus
示例#20
0
def main():
    global fullMatch
    global sites
    Script.registerSwitch("F", "full-match", "Check all the matching criteria",
                          setFullMatch)
    Script.registerSwitch(
        "S:", "site=", "Check matching for these sites (comma separated list)",
        setSites)

    Script.parseCommandLine(ignoreErrors=True)
    args = Script.getPositionalArgs()

    if len(args) == 0:
        gLogger.error("Error: No job description provided")
        Script.showHelp(exitCode=1)

    from DIRAC.Core.Security.ProxyInfo import getVOfromProxyGroup
    from DIRAC.ConfigurationSystem.Client.Helpers import Resources
    from DIRAC.Core.Utilities.PrettyPrint import printTable
    from DIRAC.ResourceStatusSystem.Client.ResourceStatus import ResourceStatus
    from DIRAC.ResourceStatusSystem.Client.SiteStatus import SiteStatus
    from DIRAC.WorkloadManagementSystem.Utilities.QueueUtilities import getQueuesResolved, matchQueue

    with open(args[0]) as f:
        jdl = f.read()

    # Get the current VO
    result = getVOfromProxyGroup()
    if not result['OK']:
        gLogger.error('No proxy found, please login')
        DIRACExit(-1)
    voName = result['Value']

    resultQueues = Resources.getQueues(siteList=sites, community=voName)
    if not resultQueues['OK']:
        gLogger.error('Failed to get CE information')
        DIRACExit(-1)
    siteDict = resultQueues['Value']
    result = getQueuesResolved(siteDict)
    if not resultQueues['OK']:
        gLogger.error('Failed to get CE information')
        DIRACExit(-1)
    queueDict = result['Value']

    # get list of usable sites within this cycle
    resultMask = SiteStatus().getUsableSites()
    if not resultMask['OK']:
        gLogger.error('Failed to get Site mask information')
        DIRACExit(-1)
    siteMaskList = resultMask.get('Value', [])

    rssClient = ResourceStatus()

    fields = ('Site', 'CE', 'Queue', 'Status', 'Match', 'Reason')
    records = []

    for queue, queueInfo in queueDict.items():
        site = queueInfo['Site']
        ce = queueInfo['CEName']
        siteStatus = "Active" if site in siteMaskList else "InActive"
        ceStatus = siteStatus
        if rssClient.rssFlag:
            result = rssClient.getElementStatus(ce, "ComputingElement")
            if result['OK']:
                ceStatus = result['Value'][ce]['all']

        result = matchQueue(jdl, queueInfo, fullMatch=fullMatch)
        if not result['OK']:
            gLogger.error('Failed in getting match data', result['Message'])
            DIRACExit(-1)
        status = "Active" if siteStatus == "Active" and ceStatus == "Active" else "Inactive"
        if result['Value']['Match']:
            records.append((site, ce, queueInfo['Queue'], status, 'Yes', ''))
        else:
            records.append((site, ce, queueInfo['Queue'], status, 'No',
                            result['Value']['Reason']))

    gLogger.notice(
        printTable(fields,
                   records,
                   sortField='Site',
                   columnSeparator='  ',
                   printOut=False))
示例#21
0
 def initializeOptimizer(cls):
     """ Initialization of the optimizer.
 """
     cls.siteClient = SiteStatus()
     cls.__jobDB = JobDB()
     return S_OK()
示例#22
0
 def initialize(self):
     self.siteClient = SiteStatus()
     return S_OK()
示例#23
0
    def checkJob(self, job, classAdJob):
        """This method controls the checking of the job.
    """
        self.log.verbose('Job %s will be processed' % (job))

        # Check if the job was recently rescheduled
        result = self.jobDB.getJobAttributes(
            job, ['RescheduleCounter', 'RescheduleTime', 'ApplicationStatus'])
        if not result['OK']:
            self.log.error(result['Message'])
            return S_ERROR('Can not get job attributes from JobDB')
        jobDict = result['Value']
        reCounter = int(jobDict['RescheduleCounter'])
        if reCounter != 0:
            reTime = fromString(jobDict['RescheduleTime'])
            delta = toEpoch() - toEpoch(reTime)
            delay = self.maxRescheduleDelay
            if reCounter <= len(self.rescheduleDelaysList):
                delay = self.rescheduleDelaysList[reCounter - 1]
            if delta < delay:
                if jobDict['ApplicationStatus'].find(
                        'On Hold: after rescheduling') == -1:
                    result = self.jobDB.setJobStatus(
                        job,
                        application='On Hold: after rescheduling #%d' %
                        reCounter)
                return S_OK()

        # First, get Site and BannedSites from the Job

        result = self.__getJobSiteRequirement(job, classAdJob)
        userBannedSites = result['BannedSites']
        userSites = result['Sites']

        if userSites:
            userSites = applySiteRequirements(userSites, [], userBannedSites)
            if not userSites:
                msg = 'Impossible Site Requirement'
                return S_ERROR(msg)

        # Second, get the Active and Banned sites from the RSS

        siteStatus = SiteStatus()

        usableSites = siteStatus.getUsableSites('ComputingAccess')
        unusableSites = siteStatus.getUnusableSites('ComputingAccess')

        if not (usableSites['OK'] and unusableSites['OK']):
            if not usableSites['OK']:
                self.log.error(usableSites['Message'])
            if not unusableSites['OK']:
                self.log.error(unusableSites['Message'])
            return S_ERROR('Can not get Active and Banned Sites from JobDB')

        usableSites = usableSites['Value']
        unusableSites = unusableSites['Value']

        if userSites:
            sites = applySiteRequirements(userSites, usableSites,
                                          unusableSites)
            if not sites:
                # Put on Hold only non-excluded job types
                jobType = classAdJob.getAttributeString('JobType')
                if not jobType in self.excludedOnHoldJobTypes:
                    msg = 'On Hold: Requested site is Banned or not Active'
                    self.log.info(msg)
                    result = self.jobDB.setJobStatus(job, application=msg)
                    return S_OK()

        # Third, check if there is input data
        result = self.jobDB.getInputData(job)
        if not result['OK']:
            self.log.warn('Failed to get input data from JobDB for %s' % (job))
            self.log.error(result['Message'])
            return S_ERROR('Failed to get input data from JobDB')

        if not result['Value']:
            return self.__sendJobToTaskQueue(job, classAdJob, userSites,
                                             userBannedSites)

        hasInputData = False
        inputData = []
        for lfn in result['Value']:
            if lfn:
                inputData.append(lfn)
                hasInputData = True

        if not hasInputData:
            #With no input data requirement, job can proceed directly to task queue
            self.log.verbose('Job %s has no input data requirement' % (job))
            return self.__sendJobToTaskQueue(job, classAdJob, userSites,
                                             userBannedSites)

        self.log.verbose('Job %s has an input data requirement ' % (job))

        # Fourth, Check all optimizer information
        result = self.__checkOptimizerInfo(job)
        if not result['OK']:
            return result

        optInfo = result['Value']

        #Compare site candidates with current mask
        optSites = optInfo['SiteCandidates'].keys()
        self.log.info('Input Data Site Candidates: %s' % (', '.join(optSites)))
        # Check that it is compatible with user requirements
        optSites = applySiteRequirements(optSites, userSites, userBannedSites)
        if not optSites:
            msg = 'Impossible Site + InputData Requirement'
            return S_ERROR(msg)

        sites = applySiteRequirements(optSites, usableSites, unusableSites)
        if not sites:
            msg = 'On Hold: InputData Site is Banned or not Active'
            self.log.info(msg)
            result = self.jobDB.setJobStatus(job, application=msg)
            return S_OK()

        #Set stager request as necessary, optimize for smallest #files on tape if
        #more than one site candidate left at this point
        checkStaging = self.__resolveSitesForStaging(job, sites, inputData,
                                                     optInfo['SiteCandidates'])
        if not checkStaging['OK']:
            return checkStaging

        destinationSites = checkStaging['SiteCandidates']
        if not destinationSites:
            return S_ERROR('No destination sites available')

        stagingFlag = checkStaging['Value']
        if stagingFlag:
            #Single site candidate chosen and staging required
            self.log.verbose('Job %s requires staging of input data' % (job))
            # set all LFN to disk for the selected site
            stagingSite = destinationSites[0]
            siteDict = optInfo['SiteCandidates'][stagingSite]
            siteDict['disk'] = siteDict['disk'] + siteDict['tape']
            siteDict['tape'] = 0

            optInfo['SiteCandidates'][stagingSite] = siteDict
            self.log.verbose(
                'Updating %s Optimizer Info for Job %s:' %
                (self.dataAgentName, job), optInfo)
            result = self.setOptimizerJobInfo(job, self.dataAgentName, optInfo)
            if not result['OK']:
                return result

            # Site is selected for staging, report it
            self.log.verbose('Staging site candidate for job %s is %s' %
                             (job, stagingSite))

            result = self.__getStagingSites(stagingSite, destinationSites)
            if not result['OK']:
                stagingSites = [stagingSite]
            else:
                stagingSites = result['Value']

            if len(stagingSites) == 1:
                self.jobDB.setJobAttribute(job, 'Site', stagingSite)
            else:
                # Get the name of the site group
                result = self.__getSiteGroup(stagingSites)
                if result['OK']:
                    groupName = result['Value']
                    if groupName:
                        self.jobDB.setJobAttribute(job, 'Site', groupName)
                    else:
                        self.jobDB.setJobAttribute(job, 'Site', 'Multiple')
                else:
                    self.jobDB.setJobAttribute(job, 'Site', 'Multiple')

            stagerDict = self.__setStagingRequest(job, stagingSite, optInfo)
            if not stagerDict['OK']:
                return stagerDict
            self.__updateOtherSites(job, stagingSite, stagerDict['Value'],
                                    optInfo)
            return S_OK()
        else:
            #No staging required, can proceed to task queue agent and then waiting status
            self.log.verbose('Job %s does not require staging of input data' %
                             (job))
        #Finally send job to TaskQueueAgent
        return self.__sendJobToTaskQueue(job, classAdJob, destinationSites,
                                         userBannedSites)
示例#24
0
  def optimizeJob( self, jid, jobState ):
    # Reschedule delay
    result = jobState.getAttributes( [ 'RescheduleCounter', 'RescheduleTime', 'ApplicationStatus' ] )
    if not result[ 'OK' ]:
      return result
    attDict = result[ 'Value' ]
    try:
      reschedules = int( attDict[ 'RescheduleCounter' ] )
    except ValueError:
      return S_ERROR( "RescheduleCounter has to be an integer" )
    if reschedules != 0:
      delays = self.ex_getOption( 'RescheduleDelays', [60, 180, 300, 600] )
      delay = delays[ min( reschedules, len( delays ) - 1 ) ]
      waited = toEpoch() - toEpoch( fromString( attDict[ 'RescheduleTime' ] ) )
      if waited < delay:
        return self.__holdJob( jobState, 'On Hold: after rescheduling %s' % reschedules, delay )

    # Get site requirements
    result = self._getSitesRequired( jobState )
    if not result[ 'OK' ]:
      return result
    userSites, userBannedSites = result[ 'Value' ]

    # Get active and banned sites from DIRAC
    siteStatus = SiteStatus()
    result = siteStatus.getUsableSites( 'ComputingAccess' )
    if not result[ 'OK' ]:
      return S_ERROR( "Cannot retrieve active sites from JobDB" )
    usableSites = result[ 'Value' ]
    result = siteStatus.getUnusableSites( 'ComputingAccess' )
    if not result[ 'OK' ]:
      return S_ERROR( "Cannot retrieve banned sites from JobDB" )
    unusableSites = result[ 'Value' ]

    # If the user has selected any site, filter them and hold the job if not able to run
    if userSites:
      result = jobState.getAttribute( "JobType" )
      if not result[ 'OK' ]:
        return S_ERROR( "Could not retrieve job type" )
      jobType = result[ 'Value' ]
      if jobType not in self.ex_getOption( 'ExcludedOnHoldJobTypes', [] ):
        sites = self._applySiteFilter( userSites, usableSites, unusableSites )
        if not sites:
          return self.__holdJob( jobState, "Sites %s are inactive or banned" % ", ".join( userSites ) )

    # Get the Input data
    # Third, check if there is input data
    result = jobState.getInputData()
    if not result['OK']:
      self.jobLog.error( "Cannot get input data %s" % ( result['Message'] ) )
      return S_ERROR( 'Failed to get input data from JobDB' )

    if not result['Value']:
      # No input data? Generate requirements and next
      return self.__sendToTQ( jobState, userSites, userBannedSites )

    inputData = result[ 'Value' ]

    self.jobLog.verbose( 'Has an input data requirement' )
    idAgent = self.ex_getOption( 'InputDataAgent', 'InputData' )
    result = self.retrieveOptimizerParam( idAgent )
    if not result['OK']:
      self.jobLog.error( "Could not retrieve input data info: %s" % result[ 'Message' ] )
      return S_ERROR( "File Catalog Access Failure" )
    opData = result[ 'Value' ]
    if 'SiteCandidates' not in opData:
      return S_ERROR( "No possible site candidates" )

    # Filter input data sites with user requirement
    siteCandidates = list( opData[ 'SiteCandidates' ] )
    self.jobLog.info( "Site candidates are %s" % siteCandidates )

    siteCandidates = self._applySiteFilter( siteCandidates, userSites, userBannedSites )
    if not siteCandidates:
      return S_ERROR( "Impossible InputData * Site requirements" )

    idSites = {}
    for site in siteCandidates:
      idSites[ site ] = opData[ 'SiteCandidates' ][ site ]

    #Check if sites have correct count of disk+tape replicas
    numData = len( inputData )
    errorSites = set()
    for site in idSites:
      if numData != idSites[ site ][ 'disk' ] + idSites[ site ][ 'tape' ]:
        self.jobLog.error( "Site candidate %s does not have all the input data" % site )
        errorSites.add( site )
    for site in errorSites:
      idSites.pop( site )
    if not idSites:
      return S_ERROR( "Site candidates do not have all the input data" )

    #Check if staging is required
    stageRequired, siteCandidates = self.__resolveStaging( jobState, inputData, idSites )
    if not siteCandidates:
      return S_ERROR( "No destination sites available" )

    # Is any site active?
    stageSites = self._applySiteFilter( siteCandidates, usableSites, unusableSites )
    if not stageSites:
      return self.__holdJob( jobState, "Sites %s are inactive or banned" % ", ".join( siteCandidates ) )

    # If no staging is required send to TQ
    if not stageRequired:
      # Use siteCandidates and not stageSites because active and banned sites
      # will be taken into account on matching time
      return self.__sendToTQ( jobState, siteCandidates, userBannedSites )

    # Check if the user is allowed to stage
    if self.ex_getOption( "RestrictDataStage", False ):
      if not self.__checkStageAllowed( jobState ):
        return S_ERROR( "Stage not allowed" )

    # Get stageSites[0] because it has already been randomized and it's as good as any in stageSites
    stageSite = stageSites[0]
    self.jobLog.verbose( " Staging site will be %s" % ( stageSite ) )
    stageData = idSites[ stageSite ]
    # Set as if everything has already been staged
    stageData[ 'disk' ] += stageData[ 'tape' ]
    stageData[ 'tape' ] = 0
    # Set the site info back to the original dict to save afterwards
    opData[ 'SiteCandidates' ][ stageSite ] = stageData

    result = self.__requestStaging( jobState, stageSite, opData )
    if not result[ 'OK' ]:
      return result
    stageLFNs = result[ 'Value' ]
    self._updateSharedSESites( stageSite, stageLFNs, opData )
    # Save the optimizer data again
    self.jobLog.verbose( 'Updating %s Optimizer Info:' % ( idAgent ), opData )
    result = self.storeOptimizerParam( idAgent, opData )
    if not result[ 'OK' ]:
      return result

    return self._setJobSite( jobState, stageSites )
示例#25
0
 def setUp(self):
   self.rsClient = ResourceStatusClient()
   self.stClient = SiteStatus()
   self.stClient.rssFlag = True
示例#26
0
    def _resolveCECandidates(self, taskQueueDict):
        """
      Return a list of CEs for this TaskQueue
    """
        # assume user knows what they're doing and avoid site mask e.g. sam jobs
        if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']:
            self.log.info(
                'CEs requested by TaskQueue %s:' %
                taskQueueDict['TaskQueueID'],
                ', '.join(taskQueueDict['GridCEs']))
            return taskQueueDict['GridCEs']

        # Get the mask
        siteStatus = SiteStatus()
        ret = siteStatus.getUsableSites('ComputingAccess')
        if not ret['OK']:
            self.log.error('Can not retrieve site Mask from DB:',
                           ret['Message'])
            return []

        usableSites = ret['Value']
        if not usableSites:
            self.log.error('Site mask is empty')
            return []

        self.log.verbose('Site Mask: %s' % ', '.join(usableSites))

        # remove banned sites from siteMask
        if 'BannedSites' in taskQueueDict:
            for site in taskQueueDict['BannedSites']:
                if site in usableSites:
                    usableSites.remove(site)
                    self.log.verbose('Removing banned site %s from site Mask' %
                                     site)

        # remove from the mask if a Site is given
        siteMask = [
            site for site in usableSites
            if 'Sites' not in taskQueueDict or site in taskQueueDict['Sites']
        ]

        if not siteMask:
            # pilot can not be submitted
            self.log.info('No Valid Site Candidate in Mask for TaskQueue %s' %
                          taskQueueDict['TaskQueueID'])
            return []

        self.log.info(
            'Site Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'],
            ', '.join(siteMask))

        # Get CE's associates to the given site Names
        ceMask = []

        resources = Resources(vo=self.virtualOrganization)
        result = resources.getEligibleResources(
            'Computing', {
                'Site': siteMask,
                'SubmissionMode': 'gLite',
                'CEType': ['LCG', 'CREAM']
            })
        if not result['OK']:
            self.log.error("Failed to get eligible ce's:", result['Message'])
            return []
        ces = result['Value']

        for ce in ces:
            ceHost = resources.getComputingElementValue(ce, 'Host', 'unknown')
            if ceHost != 'unknown':
                ceMask.append(ceHost)

        if not ceMask:
            self.log.info(
                'No CE Candidate found for TaskQueue %s:' %
                taskQueueDict['TaskQueueID'], ', '.join(siteMask))

        self.log.verbose(
            'CE Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'],
            ', '.join(ceMask))

        return ceMask