示例#1
0
class DiracAdmin(API):
    """ Administrative functionalities
  """

    #############################################################################
    def __init__(self):
        """Internal initialization of the DIRAC Admin API.
    """
        super(DiracAdmin, self).__init__()

        self.csAPI = CSAPI()

        self.dbg = False
        if gConfig.getValue(self.section + '/LogLevel', 'DEBUG') == 'DEBUG':
            self.dbg = True

        self.scratchDir = gConfig.getValue(self.section + '/ScratchDir',
                                           '/tmp')
        self.currentDir = os.getcwd()
        self.rssFlag = ResourceStatus().rssFlag
        self.sitestatus = SiteStatus()

    #############################################################################
    def uploadProxy(self, group):
        """Upload a proxy to the DIRAC WMS.  This method

       Example usage:

         >>> print diracAdmin.uploadProxy('lhcb_pilot')
         {'OK': True, 'Value': 0L}

       :param group: DIRAC Group
       :type job: string
       :return: S_OK,S_ERROR

       :param permanent: Indefinitely update proxy
       :type permanent: boolean

    """
        return gProxyManager.uploadProxy(diracGroup=group)

    #############################################################################
    def setProxyPersistency(self, userDN, userGroup, persistent=True):
        """Set the persistence of a proxy in the Proxy Manager

       Example usage:

         >>> print diracAdmin.setProxyPersistency( 'some DN', 'dirac group', True )
         {'OK': True }

       :param userDN: User DN
       :type userDN: string
       :param userGroup: DIRAC Group
       :type userGroup: string
       :param persistent: Persistent flag
       :type persistent: boolean
       :return: S_OK,S_ERROR
    """
        return gProxyManager.setPersistency(userDN, userGroup, persistent)

    #############################################################################
    def checkProxyUploaded(self, userDN, userGroup, requiredTime):
        """Set the persistence of a proxy in the Proxy Manager

       Example usage:

         >>> print diracAdmin.setProxyPersistency( 'some DN', 'dirac group', True )
         {'OK': True, 'Value' : True/False }

       :param userDN: User DN
       :type userDN: string
       :param userGroup: DIRAC Group
       :type userGroup: string
       :param requiredTime: Required life time of the uploaded proxy
       :type requiredTime: boolean
       :return: S_OK,S_ERROR
    """
        return gProxyManager.userHasProxy(userDN, userGroup, requiredTime)

    #############################################################################
    def getSiteMask(self, printOutput=False, status='Active'):
        """Retrieve current site mask from WMS Administrator service.

       Example usage:

         >>> print diracAdmin.getSiteMask()
         {'OK': True, 'Value': 0L}

       :return: S_OK,S_ERROR

    """

        result = self.sitestatus.getSites(siteState=status)
        if result['OK']:
            sites = result['Value']
            if printOutput:
                sites.sort()
                for site in sites:
                    print site

        return result

    #############################################################################
    def getBannedSites(self, gridType=[], printOutput=False):
        """Retrieve current list of banned  and probing sites.

       Example usage:

         >>> print diracAdmin.getBannedSites()
         {'OK': True, 'Value': []}

       :return: S_OK,S_ERROR

    """

        bannedSites = self.sitestatus.getSites(siteState='Banned')
        if not bannedSites['OK']:
            return bannedSites

        probingSites = self.sitestatus.getSites(siteState='Probing')
        if not probingSites['OK']:
            return probingSites

        mergedList = bannedSites['Value'] + probingSites['Value']

        mergedList.sort()
        if printOutput:
            print '\n'.join(mergedList)

        return S_OK(mergedList)

    #############################################################################
    def getSiteSection(self, site, printOutput=False):
        """Simple utility to get the list of CEs for DIRAC site name.

       Example usage:

         >>> print diracAdmin.getSiteSection('LCG.CERN.ch')
         {'OK': True, 'Value':}

       :return: S_OK,S_ERROR
    """
        gridType = site.split('.')[0]
        if not gConfig.getSections('/Resources/Sites/%s' % (gridType))['OK']:
            return S_ERROR('/Resources/Sites/%s is not a valid site section' %
                           (gridType))

        result = gConfig.getOptionsDict('/Resources/Sites/%s/%s' %
                                        (gridType, site))
        if printOutput and result['OK']:
            print self.pPrint.pformat(result['Value'])
        return result

    #############################################################################
    def allowSite(self, site, comment, printOutput=False):
        """Adds the site to the site mask.

       Example usage:

         >>> print diracAdmin.allowSite()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        result = self.getSiteMask(status='Active')
        if not result['OK']:
            return result
        siteMask = result['Value']
        if site in siteMask:
            if printOutput:
                print 'Site %s is already Active' % site
            return S_OK('Site %s is already Active' % site)

        if self.rssFlag:
            result = self.sitestatus.setSiteStatus(site, 'Active', comment)
        else:
            wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
            result = wmsAdmin.allowSite(site, comment)
        if not result['OK']:
            return result

        if printOutput:
            print 'Site %s status is set to Active' % site

        return result

    #############################################################################
    def getSiteMaskLogging(self, site=None, printOutput=False):
        """Retrieves site mask logging information.

       Example usage:

         >>> print diracAdmin.getSiteMaskLogging('LCG.AUVER.fr')
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.getSiteMaskLogging(site)
        if not result['OK']:
            return result

        if site:
            if not result['Value'].has_key(site):
                return S_ERROR('Site mask information not available for %s' %
                               (site))

        if printOutput:
            if site:
                print '\nSite Mask Logging Info for %s\n' % site
            else:
                print '\nAll Site Mask Logging Info\n'

            siteDict = result['Value']
            for site, tupleList in siteDict.iteritems():
                if not site:
                    print '\n===> %s\n' % site
                for tup in tupleList:
                    print str( tup[0] ).ljust( 8 ) + str( tup[1] ).ljust( 20 ) + \
                         '( ' + str( tup[2] ).ljust( len( str( tup[2] ) ) ) + ' )  "' + str( tup[3] ) + '"'
                print ' '
        return result

    #############################################################################
    def banSite(self, site, comment, printOutput=False):
        """Removes the site from the site mask.

       Example usage:

         >>> print diracAdmin.banSite()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        mask = self.getSiteMask(status='Banned')
        if not mask['OK']:
            return mask
        siteMask = mask['Value']
        if site in siteMask:
            if printOutput:
                print 'Site %s is already Banned' % site
            return S_OK('Site %s is already Banned' % site)

        if self.rssFlag:
            result = self.sitestatus.setSiteStatus(site, 'Banned', comment)
        else:
            wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
            result = wmsAdmin.banSite(site, comment)
        if not result['OK']:
            return result

        if printOutput:
            print 'Site %s status is set to Banned' % site

        return result

    #############################################################################
    def __checkSiteIsValid(self, site):
        """Internal function to check that a site name is valid.
    """
        sites = getSiteCEMapping()
        if not sites['OK']:
            return S_ERROR('Could not get site CE mapping')
        siteList = sites['Value'].keys()
        if not site in siteList:
            return S_ERROR(
                'Specified site %s is not in list of defined sites' % site)

        return S_OK('%s is valid' % site)

    #############################################################################
    def clearMask(self):
        """Removes all sites from the site mask.  Should be used with care.

       Example usage:

         >>> print diracAdmin.clearMask()
         {'OK': True, 'Value':''}

       :return: S_OK,S_ERROR

    """
        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.clearMask()
        return result

    #############################################################################
    def getServicePorts(self, setup='', printOutput=False):
        """Checks the service ports for the specified setup.  If not given this is
       taken from the current installation (/DIRAC/Setup)

       Example usage:

         >>> print diracAdmin.getServicePorts()
         {'OK': True, 'Value':''}

       :return: S_OK,S_ERROR

    """
        if not setup:
            setup = gConfig.getValue('/DIRAC/Setup', '')

        setupList = gConfig.getSections('/DIRAC/Setups', [])
        if not setupList['OK']:
            return S_ERROR('Could not get /DIRAC/Setups sections')
        setupList = setupList['Value']
        if not setup in setupList:
            return S_ERROR('Setup %s is not in allowed list: %s' %
                           (setup, ', '.join(setupList)))

        serviceSetups = gConfig.getOptionsDict('/DIRAC/Setups/%s' % setup)
        if not serviceSetups['OK']:
            return S_ERROR('Could not get /DIRAC/Setups/%s options' % setup)
        serviceSetups = serviceSetups['Value']  # dict
        systemList = gConfig.getSections('/Systems')
        if not systemList['OK']:
            return S_ERROR('Could not get Systems sections')
        systemList = systemList['Value']
        result = {}
        for system in systemList:
            if serviceSetups.has_key(system):
                path = '/Systems/%s/%s/Services' % (system,
                                                    serviceSetups[system])
                servicesList = gConfig.getSections(path)
                if not servicesList['OK']:
                    self.log.warn('Could not get sections in %s' % path)
                else:
                    servicesList = servicesList['Value']
                    if not servicesList:
                        servicesList = []
                    self.log.verbose('System: %s ServicesList: %s' %
                                     (system, ', '.join(servicesList)))
                    for service in servicesList:
                        spath = '%s/%s/Port' % (path, service)
                        servicePort = gConfig.getValue(spath, 0)
                        if servicePort:
                            self.log.verbose('Found port for %s/%s = %s' %
                                             (system, service, servicePort))
                            result['%s/%s' % (system, service)] = servicePort
                        else:
                            self.log.warn('No port found for %s' % spath)
            else:
                self.log.warn('%s is not defined in /DIRAC/Setups/%s' %
                              (system, setup))

        if printOutput:
            print self.pPrint.pformat(result)

        return S_OK(result)

    #############################################################################
    def getProxy(self, userDN, userGroup, validity=43200, limited=False):
        """Retrieves a proxy with default 12hr validity and stores
       this in a file in the local directory by default.

       Example usage:

         >>> print diracAdmin.getProxy()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        return gProxyManager.downloadProxy(userDN,
                                           userGroup,
                                           limited=limited,
                                           requiredTimeLeft=validity)

    #############################################################################
    def getVOMSProxy(self,
                     userDN,
                     userGroup,
                     vomsAttr=False,
                     validity=43200,
                     limited=False):
        """Retrieves a proxy with default 12hr validity and VOMS extensions and stores
       this in a file in the local directory by default.

       Example usage:

         >>> print diracAdmin.getVOMSProxy()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        return gProxyManager.downloadVOMSProxy(userDN,
                                               userGroup,
                                               limited=limited,
                                               requiredVOMSAttribute=vomsAttr,
                                               requiredTimeLeft=validity)

    #############################################################################
    def getPilotProxy(self, userDN, userGroup, validity=43200):
        """Retrieves a pilot proxy with default 12hr validity and stores
       this in a file in the local directory by default.

       Example usage:

         >>> print diracAdmin.getVOMSProxy()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """

        return gProxyManager.getPilotProxyFromDIRACGroup(
            userDN, userGroup, requiredTimeLeft=validity)

    #############################################################################
    def resetJob(self, jobID):
        """Reset a job or list of jobs in the WMS.  This operation resets the reschedule
       counter for a job or list of jobs and allows them to run as new.

       Example::

         >>> print dirac.reset(12345)
         {'OK': True, 'Value': [12345]}

       :param job: JobID
       :type job: integer or list of integers
       :return: S_OK,S_ERROR

    """
        if isinstance(jobID, basestring):
            try:
                jobID = int(jobID)
            except Exception as x:
                return self._errorReport(
                    str(x),
                    'Expected integer or convertible integer for existing jobID'
                )
        elif isinstance(jobID, list):
            try:
                jobID = [int(job) for job in jobID]
            except Exception as x:
                return self._errorReport(
                    str(x),
                    'Expected integer or convertible integer for existing jobIDs'
                )

        jobManager = RPCClient('WorkloadManagement/JobManager',
                               useCertificates=False)
        result = jobManager.resetJob(jobID)
        return result

    #############################################################################
    def getJobPilotOutput(self, jobID, directory=''):
        """Retrieve the pilot output for an existing job in the WMS.
       The output will be retrieved in a local directory unless
       otherwise specified.

         >>> print dirac.getJobPilotOutput(12345)
         {'OK': True, StdOut:'',StdError:''}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport('Directory %s does not exist' % directory)

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.getJobPilotOutput(jobID)
        if not result['OK']:
            return result

        outputPath = '%s/pilot_%s' % (directory, jobID)
        if os.path.exists(outputPath):
            self.log.info('Remove %s and retry to continue' % outputPath)
            return S_ERROR('Remove %s and retry to continue' % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose('Creating directory %s' % outputPath)
            os.mkdir(outputPath)

        outputs = result['Value']
        if outputs.has_key('StdOut'):
            stdout = '%s/std.out' % (outputPath)
            with open(stdout, 'w') as fopen:
                fopen.write(outputs['StdOut'])
            self.log.verbose('Standard output written to %s' % (stdout))
        else:
            self.log.warn('No standard output returned')

        if outputs.has_key('StdError'):
            stderr = '%s/std.err' % (outputPath)
            with open(stderr, 'w') as fopen:
                fopen.write(outputs['StdError'])
            self.log.verbose('Standard error written to %s' % (stderr))
        else:
            self.log.warn('No standard error returned')

        self.log.always('Outputs retrieved in %s' % outputPath)
        return result

    #############################################################################
    def getPilotOutput(self, gridReference, directory=''):
        """Retrieve the pilot output  (std.out and std.err) for an existing job in the WMS.

         >>> print dirac.getJobPilotOutput(12345)
         {'OK': True, 'Value': {}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport('Directory %s does not exist' % directory)

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.getPilotOutput(gridReference)
        if not result['OK']:
            return result

        gridReferenceSmall = gridReference.split('/')[-1]
        if not gridReferenceSmall:
            gridReferenceSmall = 'reference'
        outputPath = '%s/pilot_%s' % (directory, gridReferenceSmall)

        if os.path.exists(outputPath):
            self.log.info('Remove %s and retry to continue' % outputPath)
            return S_ERROR('Remove %s and retry to continue' % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose('Creating directory %s' % outputPath)
            os.mkdir(outputPath)

        outputs = result['Value']
        if outputs.has_key('StdOut'):
            stdout = '%s/std.out' % (outputPath)
            with open(stdout, 'w') as fopen:
                fopen.write(outputs['StdOut'])
            self.log.info('Standard output written to %s' % (stdout))
        else:
            self.log.warn('No standard output returned')

        if outputs.has_key('StdErr'):
            stderr = '%s/std.err' % (outputPath)
            with open(stderr, 'w') as fopen:
                fopen.write(outputs['StdErr'])
            self.log.info('Standard error written to %s' % (stderr))
        else:
            self.log.warn('No standard error returned')

        self.log.always('Outputs retrieved in %s' % outputPath)
        return result

    #############################################################################
    def getPilotInfo(self, gridReference):
        """Retrieve info relative to a pilot reference

         >>> print dirac.getPilotInfo(12345)
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.getPilotInfo(gridReference)
        return result

    #############################################################################
    def killPilot(self, gridReference):
        """Kill the pilot specified

         >>> print dirac.getPilotInfo(12345)
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, basestring):
            return self._errorReport('Expected string for pilot reference')

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.killPilot(gridReference)
        return result

    #############################################################################
    def getPilotLoggingInfo(self, gridReference):
        """Retrieve the pilot logging info for an existing job in the WMS.

         >>> print dirac.getPilotLoggingInfo(12345)
         {'OK': True, 'Value': {"The output of the command"}}

       :param gridReference: Gridp pilot job reference Id
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
        if type(gridReference) not in types.StringTypes:
            return self._errorReport('Expected string for pilot reference')

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        return wmsAdmin.getPilotLoggingInfo(gridReference)

    #############################################################################
    def getJobPilots(self, jobID):
        """Extract the list of submitted pilots and their status for a given
       jobID from the WMS.  Useful information is printed to the screen.

         >>> print dirac.getJobPilots()
         {'OK': True, 'Value': {PilotID:{StatusDict}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR

    """
        if isinstance(jobID, basestring):
            try:
                jobID = int(jobID)
            except Exception as x:
                return self._errorReport(
                    str(x), 'Expected integer or string for existing jobID')

        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.getPilots(jobID)
        if result['OK']:
            print self.pPrint.pformat(result['Value'])
        return result

    #############################################################################
    def getPilotSummary(self, startDate='', endDate=''):
        """Retrieve the pilot output for an existing job in the WMS.  Summary is
       printed at INFO level, full dictionary of results also returned.

         >>> print dirac.getPilotSummary()
         {'OK': True, 'Value': {CE:{Status:Count}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
        result = wmsAdmin.getPilotSummary(startDate, endDate)
        if not result['OK']:
            return result

        ceDict = result['Value']
        headers = 'CE'.ljust(28)
        i = 0
        for ce, summary in ceDict.iteritems():
            states = summary.keys()
            if len(states) > i:
                i = len(states)

        for i in xrange(i):
            headers += 'Status'.ljust(12) + 'Count'.ljust(12)
        print headers

        for ce, summary in ceDict.iteritems():
            line = ce.ljust(28)
            states = summary.keys()
            states.sort()
            for state in states:
                count = str(summary[state])
                line += state.ljust(12) + count.ljust(12)
            print line

        return result

    #############################################################################
    def selectRequests(self,
                       jobID=None,
                       requestID=None,
                       requestName=None,
                       requestType=None,
                       status=None,
                       operation=None,
                       ownerDN=None,
                       ownerGroup=None,
                       requestStart=0,
                       limit=100,
                       printOutput=False):
        """Select requests from the request management system. A few notes on the selection criteria:

         - jobID is the WMS JobID for the request (if applicable)
         - requestID is assigned during submission of the request
         - requestName is the corresponding XML file name
         - requestType e.g. 'transfer'
         - status e.g. Done
         - operation e.g. replicateAndRegister
         - requestStart e.g. the first request to consider (start from 0 by default)
         - limit e.g. selection limit (default 100)

       >>> dirac.selectRequests(jobID='4894')
       {'OK': True, 'Value': [[<Requests>]]}

    """
        options = {
            'RequestID': requestID,
            'RequestName': requestName,
            'JobID': jobID,
            'OwnerDN': ownerDN,
            'OwnerGroup': ownerGroup,
            'RequestType': requestType,
            'Status': status,
            'Operation': operation
        }

        conditions = {}
        for key, value in options.iteritems():
            if value:
                try:
                    conditions[key] = str(value)
                except Exception as x:
                    return self._errorReport(
                        str(x), 'Expected string for %s field' % key)

        try:
            requestStart = int(requestStart)
            limit = int(limit)
        except Exception as x:
            return self._errorReport(str(x),
                                     'Expected integer for %s field' % limit)

        self.log.verbose('Will select requests with the following conditions')
        self.log.verbose(self.pPrint.pformat(conditions))
        requestClient = RPCClient("RequestManagement/centralURL")
        result = requestClient.getRequestSummaryWeb(conditions, [],
                                                    requestStart, limit)
        if not result['OK']:
            self.log.warn(result['Message'])
            return result

        requestIDs = result['Value']
        conds = []
        for key, value in conditions.iteritems():
            if value:
                conds.append('%s = %s' % (key, value))
        self.log.verbose(
            '%s request(s) selected with conditions %s and limit %s' %
            (len(requestIDs['Records']), ', '.join(conds), limit))
        if printOutput:
            requests = []
            if len(requestIDs['Records']) > limit:
                requestList = requestIDs['Records']
                requests = requestList[:limit]
            else:
                requests = requestIDs['Records']
            print '%s request(s) selected with conditions %s and limit %s' % (
                len(requestIDs['Records']), ', '.join(conds), limit)
            print requestIDs['ParameterNames']
            for request in requests:
                print request
        if not requestIDs:
            return S_ERROR('No requests selected for conditions: %s' %
                           conditions)
        else:
            return result

    #############################################################################
    def getRequestSummary(self, printOutput=False):
        """
    Get a summary of the requests in the request DB.
    """
        requestClient = RPCClient("RequestManagement/centralURL", timeout=120)
        result = requestClient.getDBSummary()
        if not result['OK']:
            self.log.warn(result['Message'])
            return result

        if printOutput:
            print self.pPrint.pformat(result['Value'])

        return result

    #############################################################################
    def getExternalPackageVersions(self):
        """
    Simple function that attempts to obtain the external versions for
    the local DIRAC installation (frequently needed for debugging purposes).
    """
        gLogger.info(
            'DIRAC version v%dr%d build %d' %
            (DIRAC.majorVersion, DIRAC.minorVersion, DIRAC.patchLevel))
        try:
            import lcg_util
            infoStr = 'Using lcg_util from: \n%s' % lcg_util.__file__
            gLogger.info(infoStr)
            infoStr = "The version of lcg_utils is %s" % lcg_util.lcg_util_version(
            )
            gLogger.info(infoStr)
        except Exception as x:
            errStr = "SRM2Storage.__init__: Failed to import lcg_util: %s" % (
                x)
            gLogger.exception(errStr)

        try:
            import gfalthr as gfal
            infoStr = "Using gfalthr from: \n%s" % gfal.__file__
            gLogger.info(infoStr)
            infoStr = "The version of gfalthr is %s" % gfal.gfal_version()
            gLogger.info(infoStr)
        except Exception as x:
            errStr = "SRM2Storage.__init__: Failed to import gfalthr: %s." % (
                x)
            gLogger.warn(errStr)
            try:
                import gfal
                infoStr = "Using gfal from: %s" % gfal.__file__
                gLogger.info(infoStr)
                infoStr = "The version of gfal is %s" % gfal.gfal_version()
                gLogger.info(infoStr)
            except Exception as x:
                errStr = "SRM2Storage.__init__: Failed to import gfal: %s" % (
                    x)
                gLogger.exception(errStr)

        defaultProtocols = gConfig.getValue(
            '/Resources/StorageElements/DefaultProtocols', [])
        gLogger.info('Default list of protocols are: %s' %
                     (', '.join(defaultProtocols)))
        return S_OK()

    #############################################################################
    def getSiteProtocols(self, site, printOutput=False):
        """
    Allows to check the defined protocols for each site SE.
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site)
        siteSEs = gConfig.getValue(siteSection, [])
        if not siteSEs:
            return S_ERROR('No SEs found for site %s in section %s' %
                           (site, siteSection))

        defaultProtocols = gConfig.getValue(
            '/Resources/StorageElements/DefaultProtocols', [])
        self.log.verbose('Default list of protocols are'
                         ', '.join(defaultProtocols))
        seInfo = {}
        siteSEs.sort()
        for se in siteSEs:
            sections = gConfig.getSections('/Resources/StorageElements/%s/' %
                                           (se))
            if not sections['OK']:
                return sections
            for section in sections['Value']:
                if gConfig.getValue(
                        '/Resources/StorageElements/%s/%s/ProtocolName' %
                    (se, section), '') == 'SRM2':
                    path = '/Resources/StorageElements/%s/%s/ProtocolsList' % (
                        se, section)
                    seProtocols = gConfig.getValue(path, [])
                    if not seProtocols:
                        seProtocols = defaultProtocols
                    seInfo[se] = seProtocols

        if printOutput:
            print '\nSummary of protocols for StorageElements at site %s' % site
            print '\nStorageElement'.ljust(30) + 'ProtocolsList'.ljust(
                30) + '\n'
            for se, protocols in seInfo.iteritems():
                print se.ljust(30) + ', '.join(protocols).ljust(30)

        return S_OK(seInfo)

    #############################################################################
    def setSiteProtocols(self, site, protocolsList, printOutput=False):
        """
    Allows to set the defined protocols for each SE for a given site.
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site)
        siteSEs = gConfig.getValue(siteSection, [])
        if not siteSEs:
            return S_ERROR('No SEs found for site %s in section %s' %
                           (site, siteSection))

        defaultProtocols = gConfig.getValue(
            '/Resources/StorageElements/DefaultProtocols', [])
        self.log.verbose('Default list of protocols are',
                         ', '.join(defaultProtocols))

        for protocol in protocolsList:
            if not protocol in defaultProtocols:
                return S_ERROR(
                    'Requested to set protocol %s in list but %s is not '
                    'in default list of protocols:\n%s' %
                    (protocol, protocol, ', '.join(defaultProtocols)))

        modifiedCS = False
        result = promptUser(
            'Do you want to add the following default protocols:'
            ' %s for SE(s):\n%s' %
            (', '.join(protocolsList), ', '.join(siteSEs)))
        if not result['OK']:
            return result
        if result['Value'].lower() != 'y':
            self.log.always('No protocols will be added')
            return S_OK()

        for se in siteSEs:
            sections = gConfig.getSections('/Resources/StorageElements/%s/' %
                                           (se))
            if not sections['OK']:
                return sections
            for section in sections['Value']:
                if gConfig.getValue(
                        '/Resources/StorageElements/%s/%s/ProtocolName' %
                    (se, section), '') == 'SRM2':
                    path = '/Resources/StorageElements/%s/%s/ProtocolsList' % (
                        se, section)
                    self.log.verbose('Setting %s to %s' %
                                     (path, ', '.join(protocolsList)))
                    result = self.csSetOption(path, ', '.join(protocolsList))
                    if not result['OK']:
                        return result
                    modifiedCS = True

        if modifiedCS:
            result = self.csCommitChanges(False)
            if not result['OK']:
                return S_ERROR('CS Commit failed with message = %s' %
                               (result['Message']))
            else:
                if printOutput:
                    print 'Successfully committed changes to CS'
        else:
            if printOutput:
                print 'No modifications to CS required'

        return S_OK()

    #############################################################################
    def csSetOption(self, optionPath, optionValue):
        """
    Function to modify an existing value in the CS.
    """
        return self.csAPI.setOption(optionPath, optionValue)

    #############################################################################
    def csSetOptionComment(self, optionPath, comment):
        """
    Function to modify an existing value in the CS.
    """
        return self.csAPI.setOptionComment(optionPath, comment)

    #############################################################################
    def csModifyValue(self, optionPath, newValue):
        """
    Function to modify an existing value in the CS.
    """
        return self.csAPI.modifyValue(optionPath, newValue)

    #############################################################################
    def csRegisterUser(self, username, properties):
        """
    Registers a user in the CS.

        - username: Username of the user (easy;)
        - properties: Dict containing:
            - DN
            - groups : list/tuple of groups the user belongs to
            - <others> : More properties of the user, like mail

    """
        return self.csAPI.addUser(username, properties)

    #############################################################################
    def csDeleteUser(self, user):
        """
    Deletes a user from the CS. Can take a list of users
    """
        return self.csAPI.deleteUsers(user)

    #############################################################################
    def csModifyUser(self, username, properties, createIfNonExistant=False):
        """
    Modify a user in the CS. Takes the same params as in addUser and
    applies the changes
    """
        return self.csAPI.modifyUser(username, properties, createIfNonExistant)

    #############################################################################
    def csListUsers(self, group=False):
        """
    Lists the users in the CS. If no group is specified return all users.
    """
        return self.csAPI.listUsers(group)

    #############################################################################
    def csDescribeUsers(self, mask=False):
        """
    List users and their properties in the CS.
    If a mask is given, only users in the mask will be returned
    """
        return self.csAPI.describeUsers(mask)

    #############################################################################
    def csModifyGroup(self, groupname, properties, createIfNonExistant=False):
        """
    Modify a user in the CS. Takes the same params as in addGroup and applies
    the changes
    """
        return self.csAPI.modifyGroup(groupname, properties,
                                      createIfNonExistant)

    #############################################################################
    def csListHosts(self):
        """
    Lists the hosts in the CS
    """
        return self.csAPI.listHosts()

    #############################################################################
    def csDescribeHosts(self, mask=False):
        """
    Gets extended info for the hosts in the CS
    """
        return self.csAPI.describeHosts(mask)

    #############################################################################
    def csModifyHost(self, hostname, properties, createIfNonExistant=False):
        """
    Modify a host in the CS. Takes the same params as in addHost and applies
    the changes
    """
        return self.csAPI.modifyHost(hostname, properties, createIfNonExistant)

    #############################################################################
    def csListGroups(self):
        """
    Lists groups in the CS
    """
        return self.csAPI.listGroups()

    #############################################################################
    def csDescribeGroups(self, mask=False):
        """
    List groups and their properties in the CS.
    If a mask is given, only groups in the mask will be returned
    """
        return self.csAPI.describeGroups(mask)

    #############################################################################
    def csSyncUsersWithCFG(self, usersCFG):
        """
    Synchronize users in cfg with its contents
    """
        return self.csAPI.syncUsersWithCFG(usersCFG)

    #############################################################################
    def csCommitChanges(self, sortUsers=True):
        """
    Commit the changes in the CS
    """
        return self.csAPI.commitChanges(sortUsers=False)

    #############################################################################
    def sendMail(self,
                 address,
                 subject,
                 body,
                 fromAddress=None,
                 localAttempt=True,
                 html=False):
        """
    Send mail to specified address with body.
    """
        notification = NotificationClient()
        return notification.sendMail(address, subject, body, fromAddress,
                                     localAttempt, html)

    #############################################################################
    def sendSMS(self, userName, body, fromAddress=None):
        """
    Send mail to specified address with body.
    """
        if len(body) > 160:
            return S_ERROR('Exceeded maximum SMS length of 160 characters')
        notification = NotificationClient()
        return notification.sendSMS(userName, body, fromAddress)

    #############################################################################
    def getBDIISite(self, site, host=None):
        """
    Get information about site from BDII at host
    """
        return ldapSite(site, host=host)

    #############################################################################
    def getBDIICluster(self, ce, host=None):
        """
    Get information about ce from BDII at host
    """
        return ldapCluster(ce, host=host)

    #############################################################################
    def getBDIICE(self, ce, host=None):
        """
    Get information about ce from BDII at host
    """
        return ldapCE(ce, host=host)

    #############################################################################
    def getBDIIService(self, ce, host=None):
        """
    Get information about ce from BDII at host
    """
        return ldapService(ce, host=host)

    #############################################################################
    def getBDIICEState(self, ce, useVO=voName, host=None):
        """
    Get information about ce state from BDII at host
    """
        return ldapCEState(ce, useVO, host=host)

    #############################################################################
    def getBDIICEVOView(self, ce, useVO=voName, host=None):
        """
    Get information about ce voview from BDII at host
    """
        return ldapCEVOView(ce, useVO, host=host)

    #############################################################################
    def getBDIISE(self, site, useVO=voName, host=None):
        """
    Get information about SA  from BDII at host
    """
        return ldapSE(site, useVO, host=host)
示例#2
0
class SiteInspectorAgent(AgentModule):
    """ SiteInspectorAgent

  The SiteInspectorAgent agent is an agent that is used to get the all the site names
  and trigger PEP to evaluate their status.

  """

    # Max number of worker threads by default
    __maxNumberOfThreads = 15

    # Inspection freqs, defaults, the lower, the higher priority to be checked.
    # Error state usually means there is a glitch somewhere, so it has the highest
    # priority.
    __checkingFreqs = {
        'Active': 20,
        'Degraded': 20,
        'Probing': 20,
        'Banned': 15,
        'Unknown': 10,
        'Error': 5
    }

    def __init__(self, *args, **kwargs):

        AgentModule.__init__(self, *args, **kwargs)

        # ElementType, to be defined among Site, Resource or Node
        self.sitesToBeChecked = None
        self.threadPool = None
        self.siteClient = None
        self.clients = {}

    def initialize(self):
        """ Standard initialize.
    """

        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads',
                                               self.__maxNumberOfThreads)
        self.threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)

        self.siteClient = SiteStatus()

        self.clients['SiteStatus'] = self.siteClient
        self.clients['ResourceManagementClient'] = ResourceManagementClient()

        return S_OK()

    def execute(self):
        """ execute

    This is the main method of the agent. It gets the sites from the Database, calculates how many threads should be
    started and spawns them. Each thread will get a site from the queue until
    it is empty. At the end, the method will join the queue such that the agent
    will not terminate a cycle until all sites have been processed.

    """

        # Gets sites to be checked ( returns a Queue )
        sitesToBeChecked = self.getSitesToBeChecked()
        if not sitesToBeChecked['OK']:
            self.log.error(sitesToBeChecked['Message'])
            return sitesToBeChecked
        self.sitesToBeChecked = sitesToBeChecked['Value']

        queueSize = self.sitesToBeChecked.qsize()
        pollingTime = self.am_getPollingTime()

        # Assigns number of threads on the fly such that we exhaust the PollingTime
        # without having to spawn too many threads. We assume 10 seconds per element
        # to be processed ( actually, it takes something like 1 sec per element ):
        # numberOfThreads = elements * 10(s/element) / pollingTime
        numberOfThreads = int(math.ceil(queueSize * 10. / pollingTime))

        self.log.info('Needed %d threads to process %d elements' %
                      (numberOfThreads, queueSize))

        for _x in xrange(numberOfThreads):
            jobUp = self.threadPool.generateJobAndQueueIt(self._execute)
            if not jobUp['OK']:
                self.log.error(jobUp['Message'])

        self.log.info('blocking until all sites have been processed')
        # block until all tasks are done
        self.sitesToBeChecked.join()
        self.log.info('done')

        return S_OK()

    def getSitesToBeChecked(self):
        """ getElementsToBeChecked

    This method gets all the site names from the SiteStatus table, after that it get the details of each
    site (status, name, etc..) and adds them to a queue.

    """

        toBeChecked = Queue.Queue()

        res = self.siteClient.getSites('All')
        if not res['OK']:
            return res

        # get the current status
        res = self.siteClient.getSiteStatuses(res['Value'])
        if not res['OK']:
            return res

        # filter elements
        for site in res['Value']:
            status = res['Value'].get(site, 'Unknown')

            toBeChecked.put({
                'status': status,
                'name': site,
                'site': site,
                'element': 'Site',
                'statusType': 'all',
                'elementType': 'Site'
            })

        return S_OK(toBeChecked)

    # Private methods ............................................................

    def _execute(self):
        """
      Method run by each of the thread that is in the ThreadPool.
      It enters a loop until there are no sites on the queue.

      On each iteration, it evaluates the policies for such site
      and enforces the necessary actions. If there are no more sites in the
      queue, the loop is finished.
    """

        pep = PEP(clients=self.clients)

        while True:

            try:
                site = self.sitesToBeChecked.get_nowait()
            except Queue.Empty:
                return S_OK()

            resEnforce = pep.enforce(site)
            if not resEnforce['OK']:
                self.log.error('Failed policy enforcement',
                               resEnforce['Message'])
                self.sitesToBeChecked.task_done()
                continue

            # Used together with join !
            self.sitesToBeChecked.task_done()
示例#3
0
class DiracAdmin(API):
    """ Administrative functionalities
  """

    #############################################################################
    def __init__(self):
        """Internal initialization of the DIRAC Admin API.
    """
        super(DiracAdmin, self).__init__()

        self.csAPI = CSAPI()

        self.dbg = False
        if gConfig.getValue(self.section + '/LogLevel', 'DEBUG') == 'DEBUG':
            self.dbg = True

        self.scratchDir = gConfig.getValue(self.section + '/ScratchDir',
                                           '/tmp')
        self.currentDir = os.getcwd()
        self.rssFlag = ResourceStatus().rssFlag
        self.sitestatus = SiteStatus()
        self._siteSet = set(getSites().get('Value', []))

    #############################################################################
    def uploadProxy(self):
        """Upload a proxy to the DIRAC WMS.  This method

       Example usage:

         >>> print diracAdmin.uploadProxy('dteam_pilot')
         {'OK': True, 'Value': 0L}

       :return: S_OK,S_ERROR

       :param permanent: Indefinitely update proxy
       :type permanent: boolean

    """
        return gProxyManager.uploadProxy()

    #############################################################################
    def setProxyPersistency(self, userDN, userGroup, persistent=True):
        """Set the persistence of a proxy in the Proxy Manager

       Example usage:

         >>> gLogger.notice(diracAdmin.setProxyPersistency( 'some DN', 'dirac group', True ))
         {'OK': True }

       :param userDN: User DN
       :type userDN: string
       :param userGroup: DIRAC Group
       :type userGroup: string
       :param persistent: Persistent flag
       :type persistent: boolean
       :return: S_OK,S_ERROR
    """
        return gProxyManager.setPersistency(userDN, userGroup, persistent)

    #############################################################################
    def checkProxyUploaded(self, userDN, userGroup, requiredTime):
        """Set the persistence of a proxy in the Proxy Manager

       Example usage:

         >>> gLogger.notice(diracAdmin.setProxyPersistency( 'some DN', 'dirac group', True ))
         {'OK': True, 'Value' : True/False }

       :param userDN: User DN
       :type userDN: string
       :param userGroup: DIRAC Group
       :type userGroup: string
       :param requiredTime: Required life time of the uploaded proxy
       :type requiredTime: boolean
       :return: S_OK,S_ERROR
    """
        return gProxyManager.userHasProxy(userDN, userGroup, requiredTime)

    #############################################################################
    def getSiteMask(self, printOutput=False, status='Active'):
        """Retrieve current site mask from WMS Administrator service.

       Example usage:

         >>> gLogger.notice(diracAdmin.getSiteMask())
         {'OK': True, 'Value': 0L}

       :return: S_OK,S_ERROR

    """

        result = self.sitestatus.getSites(siteState=status)
        if result['OK']:
            sites = result['Value']
            if printOutput:
                sites.sort()
                for site in sites:
                    gLogger.notice(site)

        return result

    #############################################################################
    def getBannedSites(self, printOutput=False):
        """Retrieve current list of banned  and probing sites.

       Example usage:

         >>> gLogger.notice(diracAdmin.getBannedSites())
         {'OK': True, 'Value': []}

       :return: S_OK,S_ERROR

    """

        bannedSites = self.sitestatus.getSites(siteState='Banned')
        if not bannedSites['OK']:
            return bannedSites

        probingSites = self.sitestatus.getSites(siteState='Probing')
        if not probingSites['OK']:
            return probingSites

        mergedList = sorted(bannedSites['Value'] + probingSites['Value'])

        if printOutput:
            gLogger.notice('\n'.join(mergedList))

        return S_OK(mergedList)

    #############################################################################
    def getSiteSection(self, site, printOutput=False):
        """Simple utility to get the list of CEs for DIRAC site name.

       Example usage:

         >>> gLogger.notice(diracAdmin.getSiteSection('LCG.CERN.ch'))
         {'OK': True, 'Value':}

       :return: S_OK,S_ERROR
    """
        gridType = site.split('.')[0]
        if not gConfig.getSections('/Resources/Sites/%s' % (gridType))['OK']:
            return S_ERROR('/Resources/Sites/%s is not a valid site section' %
                           (gridType))

        result = gConfig.getOptionsDict('/Resources/Sites/%s/%s' %
                                        (gridType, site))
        if printOutput and result['OK']:
            gLogger.notice(self.pPrint.pformat(result['Value']))
        return result

    #############################################################################
    def allowSite(self, site, comment, printOutput=False):
        """Adds the site to the site mask.

       Example usage:

         >>> gLogger.notice(diracAdmin.allowSite())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        result = self.getSiteMask(status='Active')
        if not result['OK']:
            return result
        siteMask = result['Value']
        if site in siteMask:
            if printOutput:
                gLogger.notice('Site %s is already Active' % site)
            return S_OK('Site %s is already Active' % site)

        if self.rssFlag:
            result = self.sitestatus.setSiteStatus(site, 'Active', comment)
        else:
            result = WMSAdministratorClient().allowSite(site, comment)
        if not result['OK']:
            return result

        if printOutput:
            gLogger.notice('Site %s status is set to Active' % site)

        return result

    #############################################################################
    def getSiteMaskLogging(self, site=None, printOutput=False):
        """Retrieves site mask logging information.

       Example usage:

         >>> gLogger.notice(diracAdmin.getSiteMaskLogging('LCG.AUVER.fr'))
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        if self.rssFlag:
            result = ResourceStatusClient().selectStatusElement('Site',
                                                                'History',
                                                                name=site)
        else:
            result = WMSAdministratorClient().getSiteMaskLogging(site)

        if not result['OK']:
            return result

        if printOutput:
            if site:
                gLogger.notice('\nSite Mask Logging Info for %s\n' % site)
            else:
                gLogger.notice('\nAll Site Mask Logging Info\n')

            sitesLogging = result['Value']
            if isinstance(sitesLogging, dict):
                for siteName, tupleList in sitesLogging.items(
                ):  # can be an iterator
                    if not siteName:
                        gLogger.notice('\n===> %s\n' % siteName)
                    for tup in tupleList:
                        stup = str(tup[0]).ljust(8) + str(tup[1]).ljust(20)
                        stup += '( ' + str(tup[2]).ljust(len(str(
                            tup[2]))) + ' )  "' + str(tup[3]) + '"'
                        gLogger.notice(stup)
                    gLogger.notice(' ')
            elif isinstance(sitesLogging, list):
                sitesLoggingList = [(sl[1], sl[3], sl[4])
                                    for sl in sitesLogging]
                for siteLog in sitesLoggingList:
                    gLogger.notice(siteLog)

        return S_OK()

    #############################################################################
    def banSite(self, site, comment, printOutput=False):
        """Removes the site from the site mask.

       Example usage:

         >>> gLogger.notice(diracAdmin.banSite())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        mask = self.getSiteMask(status='Banned')
        if not mask['OK']:
            return mask
        siteMask = mask['Value']
        if site in siteMask:
            if printOutput:
                gLogger.notice('Site %s is already Banned' % site)
            return S_OK('Site %s is already Banned' % site)

        if self.rssFlag:
            result = self.sitestatus.setSiteStatus(site, 'Banned', comment)
        else:
            result = WMSAdministratorClient().banSite(site, comment)
        if not result['OK']:
            return result

        if printOutput:
            gLogger.notice('Site %s status is set to Banned' % site)

        return result

    #############################################################################
    def __checkSiteIsValid(self, site):
        """Internal function to check that a site name is valid.
    """
        if isinstance(site, (list, set, dict)):
            site = set(site) - self._siteSet
            if not site:
                return S_OK()
        elif site in self._siteSet:
            return S_OK()
        return S_ERROR('Specified site %s is not in list of defined sites' %
                       str(site))

    #############################################################################
    def getServicePorts(self, setup='', printOutput=False):
        """Checks the service ports for the specified setup.  If not given this is
       taken from the current installation (/DIRAC/Setup)

       Example usage:

         >>> gLogger.notice(diracAdmin.getServicePorts())
         {'OK': True, 'Value':''}

       :return: S_OK,S_ERROR

    """
        if not setup:
            setup = gConfig.getValue('/DIRAC/Setup', '')

        setupList = gConfig.getSections('/DIRAC/Setups', [])
        if not setupList['OK']:
            return S_ERROR('Could not get /DIRAC/Setups sections')
        setupList = setupList['Value']
        if setup not in setupList:
            return S_ERROR('Setup %s is not in allowed list: %s' %
                           (setup, ', '.join(setupList)))

        serviceSetups = gConfig.getOptionsDict('/DIRAC/Setups/%s' % setup)
        if not serviceSetups['OK']:
            return S_ERROR('Could not get /DIRAC/Setups/%s options' % setup)
        serviceSetups = serviceSetups['Value']  # dict
        systemList = gConfig.getSections('/Systems')
        if not systemList['OK']:
            return S_ERROR('Could not get Systems sections')
        systemList = systemList['Value']
        result = {}
        for system in systemList:
            if system in serviceSetups:
                path = '/Systems/%s/%s/Services' % (system,
                                                    serviceSetups[system])
                servicesList = gConfig.getSections(path)
                if not servicesList['OK']:
                    self.log.warn('Could not get sections in %s' % path)
                else:
                    servicesList = servicesList['Value']
                    if not servicesList:
                        servicesList = []
                    self.log.verbose('System: %s ServicesList: %s' %
                                     (system, ', '.join(servicesList)))
                    for service in servicesList:
                        spath = '%s/%s/Port' % (path, service)
                        servicePort = gConfig.getValue(spath, 0)
                        if servicePort:
                            self.log.verbose('Found port for %s/%s = %s' %
                                             (system, service, servicePort))
                            result['%s/%s' % (system, service)] = servicePort
                        else:
                            self.log.warn('No port found for %s' % spath)
            else:
                self.log.warn('%s is not defined in /DIRAC/Setups/%s' %
                              (system, setup))

        if printOutput:
            gLogger.notice(self.pPrint.pformat(result))

        return S_OK(result)

    #############################################################################
    def getProxy(self, userDN, userGroup, validity=43200, limited=False):
        """Retrieves a proxy with default 12hr validity and stores
       this in a file in the local directory by default.

       Example usage:

         >>> gLogger.notice(diracAdmin.getProxy())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        return gProxyManager.downloadProxy(userDN,
                                           userGroup,
                                           limited=limited,
                                           requiredTimeLeft=validity)

    #############################################################################
    def getVOMSProxy(self,
                     userDN,
                     userGroup,
                     vomsAttr=False,
                     validity=43200,
                     limited=False):
        """Retrieves a proxy with default 12hr validity and VOMS extensions and stores
       this in a file in the local directory by default.

       Example usage:

         >>> gLogger.notice(diracAdmin.getVOMSProxy())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
        return gProxyManager.downloadVOMSProxy(userDN,
                                               userGroup,
                                               limited=limited,
                                               requiredVOMSAttribute=vomsAttr,
                                               requiredTimeLeft=validity)

    #############################################################################
    def getPilotProxy(self, userDN, userGroup, validity=43200):
        """Retrieves a pilot proxy with default 12hr validity and stores
       this in a file in the local directory by default.

       Example usage:

         >>> gLogger.notice(diracAdmin.getVOMSProxy())
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """

        return gProxyManager.getPilotProxyFromDIRACGroup(
            userDN, userGroup, requiredTimeLeft=validity)

    #############################################################################
    def resetJob(self, jobID):
        """Reset a job or list of jobs in the WMS.  This operation resets the reschedule
       counter for a job or list of jobs and allows them to run as new.

       Example::

         >>> gLogger.notice(dirac.reset(12345))
         {'OK': True, 'Value': [12345]}

       :param job: JobID
       :type job: integer or list of integers
       :return: S_OK,S_ERROR

    """
        if isinstance(jobID, six.string_types):
            try:
                jobID = int(jobID)
            except Exception as x:
                return self._errorReport(
                    str(x),
                    'Expected integer or convertible integer for existing jobID'
                )
        elif isinstance(jobID, list):
            try:
                jobID = [int(job) for job in jobID]
            except Exception as x:
                return self._errorReport(
                    str(x),
                    'Expected integer or convertible integer for existing jobIDs'
                )

        result = JobManagerClient(useCertificates=False).resetJob(jobID)
        return result

    #############################################################################
    def getJobPilotOutput(self, jobID, directory=''):
        """Retrieve the pilot output for an existing job in the WMS.
       The output will be retrieved in a local directory unless
       otherwise specified.

         >>> gLogger.notice(dirac.getJobPilotOutput(12345))
         {'OK': True, StdOut:'',StdError:''}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport('Directory %s does not exist' % directory)

        result = WMSAdministratorClient().getJobPilotOutput(jobID)
        if not result['OK']:
            return result

        outputPath = '%s/pilot_%s' % (directory, jobID)
        if os.path.exists(outputPath):
            self.log.info('Remove %s and retry to continue' % outputPath)
            return S_ERROR('Remove %s and retry to continue' % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose('Creating directory %s' % outputPath)
            os.mkdir(outputPath)

        outputs = result['Value']
        if 'StdOut' in outputs:
            stdout = '%s/std.out' % (outputPath)
            with open(stdout, 'w') as fopen:
                fopen.write(outputs['StdOut'])
            self.log.verbose('Standard output written to %s' % (stdout))
        else:
            self.log.warn('No standard output returned')

        if 'StdError' in outputs:
            stderr = '%s/std.err' % (outputPath)
            with open(stderr, 'w') as fopen:
                fopen.write(outputs['StdError'])
            self.log.verbose('Standard error written to %s' % (stderr))
        else:
            self.log.warn('No standard error returned')

        self.log.always('Outputs retrieved in %s' % outputPath)
        return result

    #############################################################################
    def getPilotOutput(self, gridReference, directory=''):
        """Retrieve the pilot output  (std.out and std.err) for an existing job in the WMS.

         >>> gLogger.notice(dirac.getJobPilotOutput(12345))
         {'OK': True, 'Value': {}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, six.string_types):
            return self._errorReport('Expected string for pilot reference')

        if not directory:
            directory = self.currentDir

        if not os.path.exists(directory):
            return self._errorReport('Directory %s does not exist' % directory)

        result = PilotManagerClient().getPilotOutput(gridReference)
        if not result['OK']:
            return result

        gridReferenceSmall = gridReference.split('/')[-1]
        if not gridReferenceSmall:
            gridReferenceSmall = 'reference'
        outputPath = '%s/pilot_%s' % (directory, gridReferenceSmall)

        if os.path.exists(outputPath):
            self.log.info('Remove %s and retry to continue' % outputPath)
            return S_ERROR('Remove %s and retry to continue' % outputPath)

        if not os.path.exists(outputPath):
            self.log.verbose('Creating directory %s' % outputPath)
            os.mkdir(outputPath)

        outputs = result['Value']
        if 'StdOut' in outputs:
            stdout = '%s/std.out' % (outputPath)
            with open(stdout, 'w') as fopen:
                fopen.write(outputs['StdOut'])
            self.log.info('Standard output written to %s' % (stdout))
        else:
            self.log.warn('No standard output returned')

        if 'StdErr' in outputs:
            stderr = '%s/std.err' % (outputPath)
            with open(stderr, 'w') as fopen:
                fopen.write(outputs['StdErr'])
            self.log.info('Standard error written to %s' % (stderr))
        else:
            self.log.warn('No standard error returned')

        self.log.always('Outputs retrieved in %s' % outputPath)
        return result

    #############################################################################
    def getPilotInfo(self, gridReference):
        """Retrieve info relative to a pilot reference

         >>> gLogger.notice(dirac.getPilotInfo(12345))
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, six.string_types):
            return self._errorReport('Expected string for pilot reference')

        result = PilotManagerClient().getPilotInfo(gridReference)
        return result

    #############################################################################
    def killPilot(self, gridReference):
        """Kill the pilot specified

         >>> gLogger.notice(dirac.getPilotInfo(12345))
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, six.string_types):
            return self._errorReport('Expected string for pilot reference')

        result = PilotManagerClient().killPilot(gridReference)
        return result

    #############################################################################
    def getPilotLoggingInfo(self, gridReference):
        """Retrieve the pilot logging info for an existing job in the WMS.

         >>> gLogger.notice(dirac.getPilotLoggingInfo(12345))
         {'OK': True, 'Value': {"The output of the command"}}

       :param gridReference: Gridp pilot job reference Id
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
        if not isinstance(gridReference, six.string_types):
            return self._errorReport('Expected string for pilot reference')

        return PilotManagerClient().getPilotLoggingInfo(gridReference)

    #############################################################################
    def getJobPilots(self, jobID):
        """Extract the list of submitted pilots and their status for a given
       jobID from the WMS.  Useful information is printed to the screen.

         >>> gLogger.notice(dirac.getJobPilots())
         {'OK': True, 'Value': {PilotID:{StatusDict}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR

    """
        if isinstance(jobID, six.string_types):
            try:
                jobID = int(jobID)
            except Exception as x:
                return self._errorReport(
                    str(x), 'Expected integer or string for existing jobID')

        result = PilotManagerClient().getPilots(jobID)
        if result['OK']:
            gLogger.notice(self.pPrint.pformat(result['Value']))
        return result

    #############################################################################
    def getPilotSummary(self, startDate='', endDate=''):
        """Retrieve the pilot output for an existing job in the WMS.  Summary is
       printed at INFO level, full dictionary of results also returned.

         >>> gLogger.notice(dirac.getPilotSummary())
         {'OK': True, 'Value': {CE:{Status:Count}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
        result = PilotManagerClient().getPilotSummary(startDate, endDate)
        if not result['OK']:
            return result

        ceDict = result['Value']
        headers = 'CE'.ljust(28)
        i = 0
        for ce, summary in ceDict.iteritems():
            states = summary.keys()
            if len(states) > i:
                i = len(states)

        for i in xrange(i):
            headers += 'Status'.ljust(12) + 'Count'.ljust(12)
        gLogger.notice(headers)

        for ce, summary in ceDict.iteritems():
            line = ce.ljust(28)
            states = sorted(summary)
            for state in states:
                count = str(summary[state])
                line += state.ljust(12) + count.ljust(12)
            gLogger.notice(line)

        return result

    #############################################################################
    def setSiteProtocols(self, site, protocolsList, printOutput=False):
        """
    Allows to set the defined protocols for each SE for a given site.
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site)
        siteSEs = gConfig.getValue(siteSection, [])
        if not siteSEs:
            return S_ERROR('No SEs found for site %s in section %s' %
                           (site, siteSection))

        defaultProtocols = gConfig.getValue(
            '/Resources/StorageElements/DefaultProtocols', [])
        self.log.verbose('Default list of protocols are',
                         ', '.join(defaultProtocols))

        for protocol in protocolsList:
            if protocol not in defaultProtocols:
                return S_ERROR(
                    'Requested to set protocol %s in list but %s is not '
                    'in default list of protocols:\n%s' %
                    (protocol, protocol, ', '.join(defaultProtocols)))

        modifiedCS = False
        result = promptUser(
            'Do you want to add the following default protocols:'
            ' %s for SE(s):\n%s' %
            (', '.join(protocolsList), ', '.join(siteSEs)))
        if not result['OK']:
            return result
        if result['Value'].lower() != 'y':
            self.log.always('No protocols will be added')
            return S_OK()

        for se in siteSEs:
            sections = gConfig.getSections('/Resources/StorageElements/%s/' %
                                           (se))
            if not sections['OK']:
                return sections
            for section in sections['Value']:
                if gConfig.getValue(
                        '/Resources/StorageElements/%s/%s/ProtocolName' %
                    (se, section), '') == 'SRM2':
                    path = '/Resources/StorageElements/%s/%s/ProtocolsList' % (
                        se, section)
                    self.log.verbose('Setting %s to %s' %
                                     (path, ', '.join(protocolsList)))
                    result = self.csSetOption(path, ', '.join(protocolsList))
                    if not result['OK']:
                        return result
                    modifiedCS = True

        if modifiedCS:
            result = self.csCommitChanges(False)
            if not result['OK']:
                return S_ERROR('CS Commit failed with message = %s' %
                               (result['Message']))
            else:
                if printOutput:
                    gLogger.notice('Successfully committed changes to CS')
        else:
            if printOutput:
                gLogger.notice('No modifications to CS required')

        return S_OK()

    #############################################################################
    def csSetOption(self, optionPath, optionValue):
        """
    Function to modify an existing value in the CS.
    """
        return self.csAPI.setOption(optionPath, optionValue)

    #############################################################################
    def csSetOptionComment(self, optionPath, comment):
        """
    Function to modify an existing value in the CS.
    """
        return self.csAPI.setOptionComment(optionPath, comment)

    #############################################################################
    def csModifyValue(self, optionPath, newValue):
        """
    Function to modify an existing value in the CS.
    """
        return self.csAPI.modifyValue(optionPath, newValue)

    #############################################################################
    def csRegisterUser(self, username, properties):
        """
    Registers a user in the CS.

        - username: Username of the user (easy;)
        - properties: Dict containing:
            - DN
            - groups : list/tuple of groups the user belongs to
            - <others> : More properties of the user, like mail

    """
        return self.csAPI.addUser(username, properties)

    #############################################################################
    def csDeleteUser(self, user):
        """
    Deletes a user from the CS. Can take a list of users
    """
        return self.csAPI.deleteUsers(user)

    #############################################################################
    def csModifyUser(self, username, properties, createIfNonExistant=False):
        """
    Modify a user in the CS. Takes the same params as in addUser and
    applies the changes
    """
        return self.csAPI.modifyUser(username, properties, createIfNonExistant)

    #############################################################################
    def csListUsers(self, group=False):
        """
    Lists the users in the CS. If no group is specified return all users.
    """
        return self.csAPI.listUsers(group)

    #############################################################################
    def csDescribeUsers(self, mask=False):
        """
    List users and their properties in the CS.
    If a mask is given, only users in the mask will be returned
    """
        return self.csAPI.describeUsers(mask)

    #############################################################################
    def csModifyGroup(self, groupname, properties, createIfNonExistant=False):
        """
    Modify a user in the CS. Takes the same params as in addGroup and applies
    the changes
    """
        return self.csAPI.modifyGroup(groupname, properties,
                                      createIfNonExistant)

    #############################################################################
    def csListHosts(self):
        """
    Lists the hosts in the CS
    """
        return self.csAPI.listHosts()

    #############################################################################
    def csDescribeHosts(self, mask=False):
        """
    Gets extended info for the hosts in the CS
    """
        return self.csAPI.describeHosts(mask)

    #############################################################################
    def csModifyHost(self, hostname, properties, createIfNonExistant=False):
        """
    Modify a host in the CS. Takes the same params as in addHost and applies
    the changes
    """
        return self.csAPI.modifyHost(hostname, properties, createIfNonExistant)

    #############################################################################
    def csListGroups(self):
        """
    Lists groups in the CS
    """
        return self.csAPI.listGroups()

    #############################################################################
    def csDescribeGroups(self, mask=False):
        """
    List groups and their properties in the CS.
    If a mask is given, only groups in the mask will be returned
    """
        return self.csAPI.describeGroups(mask)

    #############################################################################
    def csSyncUsersWithCFG(self, usersCFG):
        """
    Synchronize users in cfg with its contents
    """
        return self.csAPI.syncUsersWithCFG(usersCFG)

    #############################################################################
    def csCommitChanges(self, sortUsers=True):
        """
    Commit the changes in the CS
    """
        return self.csAPI.commitChanges(sortUsers=False)

    #############################################################################
    def sendMail(self,
                 address,
                 subject,
                 body,
                 fromAddress=None,
                 localAttempt=True,
                 html=False):
        """
    Send mail to specified address with body.
    """
        notification = NotificationClient()
        return notification.sendMail(address, subject, body, fromAddress,
                                     localAttempt, html)

    #############################################################################
    def sendSMS(self, userName, body, fromAddress=None):
        """
    Send mail to specified address with body.
    """
        if len(body) > 160:
            return S_ERROR('Exceeded maximum SMS length of 160 characters')
        notification = NotificationClient()
        return notification.sendSMS(userName, body, fromAddress)

    #############################################################################
    def getBDIISite(self, site, host=None):
        """
    Get information about site from BDII at host
    """
        return ldapSite(site, host=host)

    #############################################################################
    def getBDIICluster(self, ce, host=None):
        """
    Get information about ce from BDII at host
    """
        return ldapCluster(ce, host=host)

    #############################################################################
    def getBDIICE(self, ce, host=None):
        """
    Get information about ce from BDII at host
    """
        return ldapCE(ce, host=host)

    #############################################################################
    def getBDIIService(self, ce, host=None):
        """
    Get information about ce from BDII at host
    """
        return ldapService(ce, host=host)

    #############################################################################
    def getBDIICEState(self, ce, useVO=voName, host=None):
        """
    Get information about ce state from BDII at host
    """
        return ldapCEState(ce, useVO, host=host)

    #############################################################################
    def getBDIICEVOView(self, ce, useVO=voName, host=None):
        """
    Get information about ce voview from BDII at host
    """
        return ldapCEVOView(ce, useVO, host=host)
示例#4
0
class PilotDirector( object ):
  """
    Base Pilot Director class.
    Derived classes must implement:

      * __init__( self, submitPool ):
          that must call the parent class __init__ method and then do its own initialization
      * configure( self, csSection, submitPool ):
          that must call the parent class configure method and the do its own configuration
      * _submitPilot( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask,
                      submitPrivatePilot, privateTQ, proxy, pilotsPerJob )
      * _listMatch( self, proxy, jdl, taskQueueID, rb )
      * _getChildrenReferences( self, proxy, parentReference, taskQueueID )


    Derived classes might implement:

      * configureFromSection( self, mySection ):
          to reload from a CS section the additional datamembers they might have defined.

    If additional datamembers are defined, they must:

      - be declared in the __init__
      - be reconfigured in the configureFromSection method by executing
        self.reloadConfiguration( csSection, submitPool ) in their configure method
  """
  gridMiddleware = ''

  def __init__( self, submitPool ):
    """
     Define the logger and some defaults
    """

    if submitPool == self.gridMiddleware:
      self.log = gLogger.getSubLogger( '%sPilotDirector' % self.gridMiddleware )
    else:
      self.log = gLogger.getSubLogger( '%sPilotDirector/%s' % ( self.gridMiddleware, submitPool ) )

    self.pilot = DIRAC_PILOT
    self.submitPoolOption = '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % submitPool
    self.extraPilotOptions = []
    self.installVersion = DIRAC_VERSION
    self.installProject = DIRAC_PROJECT
    self.installation = DIRAC_INSTALLATION
    self.pilotExtensionsList = []

    self.virtualOrganization = VIRTUAL_ORGANIZATION
    self.install = DIRAC_INSTALL
    self.extraModules = DIRAC_MODULES
    self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE
    self.targetGrids = [ self.gridMiddleware ]


    self.enableListMatch = ENABLE_LISTMATCH
    self.listMatchDelay = LISTMATCH_DELAY
    self.listMatchCache = DictCache()

    self.privatePilotFraction = PRIVATE_PILOT_FRACTION

    self.errorClearTime = ERROR_CLEAR_TIME
    self.errorTicketTime = ERROR_TICKET_TIME
    self.errorMailAddress = DIRAC.errorMail
    self.alarmMailAddress = DIRAC.alarmMail
    self.mailFromAddress = FROM_MAIL

    self.siteClient = SiteStatus()

    if not  'log' in self.__dict__:
      self.log = gLogger.getSubLogger( 'PilotDirector' )
    self.log.info( 'Initialized' )

  def configure( self, csSection, submitPool ):
    """
     Here goes common configuration for all PilotDirectors
    """
    self.configureFromSection( csSection )
    self.reloadConfiguration( csSection, submitPool )

    # Get the defaults for the Setup where the Director is running
    opsHelper = Operations()
    self.installVersion = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ), [ self.installVersion ] )[0]
    self.installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ), self.installProject )
    self.installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation )
    self.pilotExtensionsList = opsHelper.getValue( "Pilot/Extensions", self.pilotExtensionsList )

    self.log.info( '===============================================' )
    self.log.info( 'Configuration:' )
    self.log.info( '' )
    self.log.info( ' Target Grids:   ', ', '.join( self.targetGrids ) )
    self.log.info( ' Install script: ', self.install )
    self.log.info( ' Pilot script:   ', self.pilot )
    self.log.info( ' Pilot modules', self.extraModules )
    self.log.info( ' Install Ver:    ', self.installVersion )
    if self.installProject:
      self.log.info( ' Project:        ', self.installProject )
    if self.installation:
      self.log.info( ' Installation:   ', self.installation )
    if self.extraPilotOptions:
      self.log.info( ' Extra Options:   ', ' '.join( self.extraPilotOptions ) )
    self.log.info( ' ListMatch:      ', self.enableListMatch )
    self.log.info( ' Private %:      ', self.privatePilotFraction * 100 )
    if self.enableListMatch:
      self.log.info( ' ListMatch Delay:', self.listMatchDelay )
    self.listMatchCache.purgeExpired()

  def reloadConfiguration( self, csSection, submitPool ):
    """
     Common Configuration can be overwriten for each GridMiddleware
    """
    mySection = csSection + '/' + self.gridMiddleware
    self.configureFromSection( mySection )

    # And Again for each SubmitPool
    mySection = csSection + '/' + submitPool
    self.configureFromSection( mySection )

  def configureFromSection( self, mySection ):
    """
      reload from CS
    """
    self.pilot = gConfig.getValue( mySection + '/PilotScript'          , self.pilot )
    self.installVersion = gConfig.getValue( mySection + '/Version'         , self.installVersion )
    self.extraPilotOptions = gConfig.getValue( mySection + '/ExtraPilotOptions'    , self.extraPilotOptions )
    self.install = gConfig.getValue( mySection + '/InstallScript'        , self.install )
    self.extraModules = gConfig.getValue( mySection + '/ExtraPilotModules'        , [] ) + self.extraModules
    self.installProject = gConfig.getValue( mySection + '/Project'        , self.installProject )
    self.installation = gConfig.getValue( mySection + '/Installation'        , self.installation )
    self.maxJobsInFillMode = gConfig.getValue( mySection + '/MaxJobsInFillMode'    , self.maxJobsInFillMode )
    self.targetGrids = gConfig.getValue( mySection + '/TargetGrids'    , self.targetGrids )

    self.enableListMatch = gConfig.getValue( mySection + '/EnableListMatch'      , self.enableListMatch )
    self.listMatchDelay = gConfig.getValue( mySection + '/ListMatchDelay'       , self.listMatchDelay )
    self.errorClearTime = gConfig.getValue( mySection + '/ErrorClearTime'       , self.errorClearTime )
    self.errorTicketTime = gConfig.getValue( mySection + '/ErrorTicketTime'      , self.errorTicketTime )
    self.errorMailAddress = gConfig.getValue( mySection + '/ErrorMailAddress'     , self.errorMailAddress )
    self.alarmMailAddress = gConfig.getValue( mySection + '/AlarmMailAddress'     , self.alarmMailAddress )
    self.mailFromAddress = gConfig.getValue( mySection + '/MailFromAddress'      , self.mailFromAddress )
    self.privatePilotFraction = gConfig.getValue( mySection + '/PrivatePilotFraction' , self.privatePilotFraction )

    virtualOrganization = gConfig.getValue( mySection + '/VirtualOrganization' , '' )
    if not virtualOrganization:
      virtualOrganization = getVOForGroup( 'NonExistingGroup' )
      if not virtualOrganization:
        virtualOrganization = self.virtualOrganization
    self.virtualOrganization = virtualOrganization

  def _resolveCECandidates( self, taskQueueDict ):
    """
      Return a list of CEs for this TaskQueue
    """
    # assume user knows what they're doing and avoid site mask e.g. sam jobs
    if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']:
      self.log.info( 'CEs requested by TaskQueue %s:' % taskQueueDict['TaskQueueID'],
                     ', '.join( taskQueueDict['GridCEs'] ) )
      return taskQueueDict['GridCEs']

    # Get the mask
    ret = self.siteClient.getSites()
    if not ret['OK']:
      self.log.error( 'Can not retrieve site Mask from DB:', ret['Message'] )
      return []

    siteMask = ret['Value']
    if not siteMask:
      self.log.error( 'Site mask is empty' )
      return []

    self.log.verbose( 'Site Mask: %s' % ', '.join( siteMask ) )

    # remove banned sites from siteMask
    if 'BannedSites' in taskQueueDict:
      for site in taskQueueDict['BannedSites']:
        if site in siteMask:
          siteMask.remove( site )
          self.log.verbose( 'Removing banned site %s from site Mask' % site )

    # remove from the mask if a Site is given
    siteMask = [ site for site in siteMask if 'Sites' not in taskQueueDict or site in taskQueueDict['Sites'] ]

    if not siteMask:
      # pilot can not be submitted
      self.log.info( 'No Valid Site Candidate in Mask for TaskQueue %s' % taskQueueDict['TaskQueueID'] )
      return []

    self.log.info( 'Site Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( siteMask ) )

    # Get CE's associates to the given site Names
    ceMask = []

    for grid in self.targetGrids:

      section = '/Resources/Sites/%s' % grid
      ret = gConfig.getSections( section )
      if not ret['OK']:
        # this is hack, maintained until LCG is added as TargetGrid for the gLite SubmitPool
        section = '/Resources/Sites/LCG'
        ret = gConfig.getSections( section )

      if not ret['OK']:
        self.log.error( 'Could not obtain CEs from CS', ret['Message'] )
        continue

      gridSites = ret['Value']
      for siteName in gridSites:
        if siteName in siteMask:
          ret = gConfig.getValue( '%s/%s/CE' % ( section, siteName ), [] )
          for ce in ret:
            submissionMode = gConfig.getValue( '%s/%s/CEs/%s/SubmissionMode' % ( section, siteName, ce ), 'gLite' )
            if submissionMode == self.gridMiddleware and ce not in ceMask:
              ceMask.append( ce )

    if not ceMask:
      self.log.info( 'No CE Candidate found for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( siteMask ) )

    self.log.verbose( 'CE Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( ceMask ) )

    return ceMask

  def _getPilotOptions( self, taskQueueDict, pilotsToSubmit ):

    # Need to limit the maximum number of pilots to submit at once
    # For generic pilots this is limited by the number of use of the tokens and the
    # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation:
    pilotsToSubmit = max( min( pilotsToSubmit, int( 50 / self.maxJobsInFillMode ) ), 1 )
    pilotOptions = []
    privateIfGenericTQ = self.privatePilotFraction > random.random()
    privateTQ = ( 'PilotTypes' in taskQueueDict and 'private' in [ t.lower() for t in taskQueueDict['PilotTypes'] ] )
    forceGeneric = 'ForceGeneric' in taskQueueDict
    submitPrivatePilot = ( privateIfGenericTQ or privateTQ ) and not forceGeneric
    if submitPrivatePilot:
      self.log.verbose( 'Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] )
      ownerDN = taskQueueDict['OwnerDN']
      ownerGroup = taskQueueDict['OwnerGroup']
      # User Group requirement
      pilotOptions.append( '-G %s' % taskQueueDict['OwnerGroup'] )
      # check if group allows jobsharing
      ownerGroupProperties = getPropertiesForGroup( ownerGroup )
      if not 'JobSharing' in ownerGroupProperties:
        # Add Owner requirement to pilot
        pilotOptions.append( "-O '%s'" % ownerDN )
      if privateTQ:
        pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private' )
      maxJobsInFillMode = self.maxJobsInFillMode
    else:
      #For generic jobs we'll submit mixture of generic and private pilots
      self.log.verbose( 'Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] )
      #ADRI: Find the generic group
      result = findGenericPilotCredentials( group = taskQueueDict[ 'OwnerGroup' ] )
      if not result[ 'OK' ]:
        self.log.error( ERROR_GENERIC_CREDENTIALS, result[ 'Message' ] )
        return S_ERROR( ERROR_GENERIC_CREDENTIALS )
      ownerDN, ownerGroup = result[ 'Value' ]

      result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) )
      if not result[ 'OK' ]:
        self.log.error( ERROR_TOKEN, result['Message'] )
        return S_ERROR( ERROR_TOKEN )
      ( token, numberOfUses ) = result[ 'Value' ]
      pilotsToSubmit = min( numberOfUses, pilotsToSubmit )

      pilotOptions.append( '-o /Security/ProxyToken=%s' % token )

      pilotsToSubmit = max( 1, ( pilotsToSubmit - 1 ) / self.maxJobsInFillMode + 1 )

      maxJobsInFillMode = int( numberOfUses / pilotsToSubmit )
    # Use Filling mode
    pilotOptions.append( '-M %s' % maxJobsInFillMode )

    # Debug
    pilotOptions.append( '-d' )
    # Setup.
    pilotOptions.append( '-S %s' % taskQueueDict['Setup'] )
    # CS Servers
    csServers = gConfig.getServersList()
    if len( csServers ) > 3:
      # Remove the master
      master = gConfigurationData.getMasterServer()
      if master in csServers:
        csServers.remove( master )
    pilotOptions.append( '-C %s' % ",".join( csServers ) )
    # DIRAC Extensions to be used in pilots
    # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line
    # line +352
    pilotExtensionsList = Operations().getValue( "Pilot/Extensions", [] )
    extensionsList = []
    if pilotExtensionsList:
      if pilotExtensionsList[0] != 'None':
        extensionsList = pilotExtensionsList
    else:
      extensionsList = getCSExtensions()
    if extensionsList:
      pilotOptions.append( '-e %s' % ",".join( extensionsList ) )

    #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure)
    opsHelper = Operations( group = taskQueueDict['OwnerGroup'], setup = taskQueueDict['Setup'] )
    # Requested version of DIRAC (it can be a list, so we take the fist one)
    version = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ) , [ self.installVersion ] )[0]
    pilotOptions.append( '-r %s' % version )
    # Requested Project to install
    installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ) , self.installProject )
    if installProject:
      pilotOptions.append( '-l %s' % installProject )
    installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation )
    if installation:
      pilotOptions.append( "-V %s" % installation )
    # Requested CPU time
    pilotOptions.append( '-T %s' % taskQueueDict['CPUTime'] )

    if self.submitPoolOption not in self.extraPilotOptions:
      pilotOptions.append( self.submitPoolOption )

    if self.extraPilotOptions:
      pilotOptions.extend( self.extraPilotOptions )

    return S_OK( ( pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ ) )

  def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit,
                     ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ):
    """
      This method must be implemented on the Backend specific derived class.
      This is problem with the Director, not with the Job so we must return S_OK
      Return S_ERROR if not defined.
    """
    self.log.error( '_submitPilots method not implemented' )
    return S_OK()

  def _submitPilot( self, proxy, pilotsToSubmit, jdl, taskQueueID, rb ):
    """ Submit pilot and get back the reference
    """
    self.log.error( '_submitPilot method not implemented' )
    return S_OK()

  def _listMatch( self, proxy, jdl, taskQueueID, rb ):
    """ This method must be implemented on the Backend specific derived class.
    """
    self.log.error( '_listMatch method not implemented' )
    return S_OK()

  def _getChildrenReferences( self, proxy, parentReference, taskQueueID ):
    """ This method must be implemented on the Backend specific derived class.
    """
    self.log.error( '_getChildrenReferences method not implemented' )
    return S_OK()

  def submitPilots( self, taskQueueDict, pilotsToSubmit, workDir = None ):
    """
      Submit pilot for the given TaskQueue,
      this method just insert the request in the corresponding ThreadPool,
      the submission is done from the Thread Pool job
    """
    try:

      taskQueueID = taskQueueDict['TaskQueueID']

      self.log.verbose( 'Submitting Pilot' )
      ceMask = self._resolveCECandidates( taskQueueDict )
      if not ceMask:
        return S_ERROR( 'No CE available for TaskQueue %d' % int( taskQueueID ) )
      result = self._getPilotOptions( taskQueueDict, pilotsToSubmit )
      if not result['OK']:
        return result
      ( pilotOptions, pilotsPerJob, ownerDN, ownerGroup, submitPrivatePilot, privateTQ ) = result['Value']
      # get a valid proxy, submit with a long proxy to avoid renewal
      ret = self._getPilotProxyFromDIRACGroup( ownerDN, ownerGroup, requiredTimeLeft = 86400 * 5 )
      if not ret['OK']:
        self.log.error( ret['Message'] )
        self.log.error( 'No proxy Available', 'User "%s", Group "%s"' % ( ownerDN, ownerGroup ) )
        return S_ERROR( ERROR_PROXY )
      proxy = ret['Value']
      # Now call a Grid Specific method to handle the final submission of the pilots
      return self._submitPilots( workDir, taskQueueDict, pilotOptions,
                                 pilotsToSubmit, ceMask,
                                 submitPrivatePilot, privateTQ,
                                 proxy, pilotsPerJob )

    except Exception:
      self.log.exception( 'Error in Pilot Submission' )

    return S_OK( 0 )

  def _getPilotProxyFromDIRACGroup( self, ownerDN, ownerGroup, requiredTimeLeft ):
    """
     To be overwritten if a given Pilot does not require a full proxy
    """
    self.log.info( "Downloading %s@%s proxy" % ( ownerDN, ownerGroup ) )
    return gProxyManager.getPilotProxyFromDIRACGroup( ownerDN, ownerGroup, requiredTimeLeft )

  def exceptionCallBack( self, threadedJob, exceptionInfo ):
    self.log.exception( 'Error in Pilot Submission' )
示例#5
0
class SiteInspectorAgent( AgentModule ):
  """ SiteInspectorAgent

  The SiteInspectorAgent agent is an agent that is used to get the all the site names
  and trigger PEP to evaluate their status.

  """

  # Max number of worker threads by default
  __maxNumberOfThreads = 15

  # Inspection freqs, defaults, the lower, the higher priority to be checked.
  # Error state usually means there is a glitch somewhere, so it has the highest
  # priority.
  __checkingFreqs = {'Active'   : 20,
                     'Degraded' : 20,
                     'Probing'  : 20,
                     'Banned'   : 15,
                     'Unknown'  : 10,
                     'Error'    : 5}


  def __init__( self, *args, **kwargs ):

    AgentModule.__init__( self, *args, **kwargs )

    # ElementType, to be defined among Site, Resource or Node
    self.sitesToBeChecked    = None
    self.threadPool          = None
    self.siteClient          = None
    self.clients             = {}


  def initialize( self ):
    """ Standard initialize.
    """

    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.__maxNumberOfThreads )
    self.threadPool    = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )

    self.siteClient  = SiteStatus()

    self.clients['SiteStatus']               = self.siteClient
    self.clients['ResourceManagementClient'] = ResourceManagementClient()

    return S_OK()

  def execute( self ):
    """ execute

    This is the main method of the agent. It gets the sites from the Database, calculates how many threads should be
    started and spawns them. Each thread will get a site from the queue until
    it is empty. At the end, the method will join the queue such that the agent
    will not terminate a cycle until all sites have been processed.

    """

    # Gets sites to be checked ( returns a Queue )
    sitesToBeChecked = self.getSitesToBeChecked()
    if not sitesToBeChecked['OK']:
      self.log.error( sitesToBeChecked['Message'] )
      return sitesToBeChecked
    self.sitesToBeChecked = sitesToBeChecked['Value']

    queueSize = self.sitesToBeChecked.qsize()
    pollingTime = self.am_getPollingTime()

    # Assigns number of threads on the fly such that we exhaust the PollingTime
    # without having to spawn too many threads. We assume 10 seconds per element
    # to be processed ( actually, it takes something like 1 sec per element ):
    # numberOfThreads = elements * 10(s/element) / pollingTime
    numberOfThreads = int( math.ceil( queueSize * 10. / pollingTime ) )

    self.log.info( 'Needed %d threads to process %d elements' % ( numberOfThreads, queueSize ) )

    for _x in xrange( numberOfThreads ):
      jobUp = self.threadPool.generateJobAndQueueIt( self._execute )
      if not jobUp['OK']:
        self.log.error( jobUp['Message'] )

    self.log.info( 'blocking until all sites have been processed' )
    # block until all tasks are done
    self.sitesToBeChecked.join()
    self.log.info( 'done')

    return S_OK()


  def getSitesToBeChecked( self ):
    """ getElementsToBeChecked

    This method gets all the site names from the SiteStatus table, after that it get the details of each
    site (status, name, etc..) and adds them to a queue.

    """

    toBeChecked = Queue.Queue()

    res = self.siteClient.getSites('All')
    if not res['OK']:
      return res

    # get the current status
    res = self.siteClient.getSiteStatuses( res['Value'] )
    if not res['OK']:
      return res

    # filter elements
    for site in res['Value']:
      status = res['Value'].get(site, 'Unknown')

      toBeChecked.put( { 'status': status,
                         'name': site,
                         'site' : site,
                         'element' : 'Site',
                         'statusType': 'all',
                         'elementType': 'Site' } )

    return S_OK( toBeChecked )


  # Private methods ............................................................

  def _execute( self ):
    """
      Method run by each of the thread that is in the ThreadPool.
      It enters a loop until there are no sites on the queue.

      On each iteration, it evaluates the policies for such site
      and enforces the necessary actions. If there are no more sites in the
      queue, the loop is finished.
    """

    pep = PEP( clients = self.clients )

    while True:

      try:
        site = self.sitesToBeChecked.get_nowait()
      except Queue.Empty:
        return S_OK()

      resEnforce = pep.enforce( site )
      if not resEnforce['OK']:
        self.log.error( 'Failed policy enforcement', resEnforce['Message'] )
        self.sitesToBeChecked.task_done()
        continue

      # Used together with join !
      self.sitesToBeChecked.task_done()
示例#6
0
class DiracAdmin(API):
  """ Administrative functionalities
  """

  #############################################################################
  def __init__(self):
    """Internal initialization of the DIRAC Admin API.
    """
    super(DiracAdmin, self).__init__()

    self.csAPI = CSAPI()

    self.dbg = False
    if gConfig.getValue(self.section + '/LogLevel', 'DEBUG') == 'DEBUG':
      self.dbg = True

    self.scratchDir = gConfig.getValue(self.section + '/ScratchDir', '/tmp')
    self.currentDir = os.getcwd()
    self.rssFlag = ResourceStatus().rssFlag
    self.sitestatus = SiteStatus()

  #############################################################################
  def uploadProxy(self, group):
    """Upload a proxy to the DIRAC WMS.  This method

       Example usage:

         >>> print diracAdmin.uploadProxy('lhcb_pilot')
         {'OK': True, 'Value': 0L}

       :param group: DIRAC Group
       :type job: string
       :return: S_OK,S_ERROR

       :param permanent: Indefinitely update proxy
       :type permanent: boolean

    """
    return gProxyManager.uploadProxy(diracGroup=group)

  #############################################################################
  def setProxyPersistency(self, userDN, userGroup, persistent=True):
    """Set the persistence of a proxy in the Proxy Manager

       Example usage:

         >>> print diracAdmin.setProxyPersistency( 'some DN', 'dirac group', True )
         {'OK': True }

       :param userDN: User DN
       :type userDN: string
       :param userGroup: DIRAC Group
       :type userGroup: string
       :param persistent: Persistent flag
       :type persistent: boolean
       :return: S_OK,S_ERROR
    """
    return gProxyManager.setPersistency(userDN, userGroup, persistent)

  #############################################################################
  def checkProxyUploaded(self, userDN, userGroup, requiredTime):
    """Set the persistence of a proxy in the Proxy Manager

       Example usage:

         >>> print diracAdmin.setProxyPersistency( 'some DN', 'dirac group', True )
         {'OK': True, 'Value' : True/False }

       :param userDN: User DN
       :type userDN: string
       :param userGroup: DIRAC Group
       :type userGroup: string
       :param requiredTime: Required life time of the uploaded proxy
       :type requiredTime: boolean
       :return: S_OK,S_ERROR
    """
    return gProxyManager.userHasProxy(userDN, userGroup, requiredTime)

  #############################################################################
  def getSiteMask(self, printOutput=False, status='Active'):
    """Retrieve current site mask from WMS Administrator service.

       Example usage:

         >>> print diracAdmin.getSiteMask()
         {'OK': True, 'Value': 0L}

       :return: S_OK,S_ERROR

    """

    result = self.sitestatus.getSites(siteState=status)
    if result['OK']:
      sites = result['Value']
      if printOutput:
        sites.sort()
        for site in sites:
          print site

    return result

  #############################################################################
  def getBannedSites(self, printOutput=False):
    """Retrieve current list of banned  and probing sites.

       Example usage:

         >>> print diracAdmin.getBannedSites()
         {'OK': True, 'Value': []}

       :return: S_OK,S_ERROR

    """

    bannedSites = self.sitestatus.getSites(siteState='Banned')
    if not bannedSites['OK']:
      return bannedSites

    probingSites = self.sitestatus.getSites(siteState='Probing')
    if not probingSites['OK']:
      return probingSites

    mergedList = sorted(bannedSites['Value'] + probingSites['Value'])

    if printOutput:
      print '\n'.join(mergedList)

    return S_OK(mergedList)

  #############################################################################
  def getSiteSection(self, site, printOutput=False):
    """Simple utility to get the list of CEs for DIRAC site name.

       Example usage:

         >>> print diracAdmin.getSiteSection('LCG.CERN.ch')
         {'OK': True, 'Value':}

       :return: S_OK,S_ERROR
    """
    gridType = site.split('.')[0]
    if not gConfig.getSections('/Resources/Sites/%s' % (gridType))['OK']:
      return S_ERROR('/Resources/Sites/%s is not a valid site section' % (gridType))

    result = gConfig.getOptionsDict('/Resources/Sites/%s/%s' % (gridType, site))
    if printOutput and result['OK']:
      print self.pPrint.pformat(result['Value'])
    return result

  #############################################################################
  def allowSite(self, site, comment, printOutput=False):
    """Adds the site to the site mask.

       Example usage:

         >>> print diracAdmin.allowSite()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
    result = self.__checkSiteIsValid(site)
    if not result['OK']:
      return result

    result = self.getSiteMask(status='Active')
    if not result['OK']:
      return result
    siteMask = result['Value']
    if site in siteMask:
      if printOutput:
        print 'Site %s is already Active' % site
      return S_OK('Site %s is already Active' % site)

    if self.rssFlag:
      result = self.sitestatus.setSiteStatus(site, 'Active', comment)
    else:
      wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
      result = wmsAdmin.allowSite(site, comment)
    if not result['OK']:
      return result

    if printOutput:
      print 'Site %s status is set to Active' % site

    return result

  #############################################################################
  def getSiteMaskLogging(self, site=None, printOutput=False):
    """Retrieves site mask logging information.

       Example usage:

         >>> print diracAdmin.getSiteMaskLogging('LCG.AUVER.fr')
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR
    """
    result = self.__checkSiteIsValid(site)
    if not result['OK']:
      return result

    if self.rssFlag:
      result = ResourceStatusClient().selectStatusElement('Site', 'History', name=site)
    else:
      result = RPCClient('WorkloadManagement/WMSAdministrator').getSiteMaskLogging(site)

    if not result['OK']:
      return result

    if printOutput:
      if site:
        print '\nSite Mask Logging Info for %s\n' % site
      else:
        print '\nAll Site Mask Logging Info\n'

      sitesLogging = result['Value']
      if isinstance(sitesLogging, dict):
        for siteName, tupleList in sitesLogging.iteritems():
          if not siteName:
            print '\n===> %s\n' % siteName
          for tup in tupleList:
            print str(tup[0]).ljust(8) + str(tup[1]).ljust(20) + \
                '( ' + str(tup[2]).ljust(len(str(tup[2]))) + ' )  "' + str(tup[3]) + '"'
          print ' '
      elif isinstance(sitesLogging, list):
        result = [(sl[1], sl[3], sl[4]) for sl in sitesLogging]

    return result

  #############################################################################
  def banSite(self, site, comment, printOutput=False):
    """Removes the site from the site mask.

       Example usage:

         >>> print diracAdmin.banSite()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
    result = self.__checkSiteIsValid(site)
    if not result['OK']:
      return result

    mask = self.getSiteMask(status='Banned')
    if not mask['OK']:
      return mask
    siteMask = mask['Value']
    if site in siteMask:
      if printOutput:
        print 'Site %s is already Banned' % site
      return S_OK('Site %s is already Banned' % site)

    if self.rssFlag:
      result = self.sitestatus.setSiteStatus(site, 'Banned', comment)
    else:
      wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
      result = wmsAdmin.banSite(site, comment)
    if not result['OK']:
      return result

    if printOutput:
      print 'Site %s status is set to Banned' % site

    return result

  #############################################################################
  def __checkSiteIsValid(self, site):
    """Internal function to check that a site name is valid.
    """
    sites = getSiteCEMapping()
    if not sites['OK']:
      return S_ERROR('Could not get site CE mapping')
    siteList = sites['Value'].keys()
    if site not in siteList:
      return S_ERROR('Specified site %s is not in list of defined sites' % site)

    return S_OK('%s is valid' % site)

  #############################################################################
  def clearMask(self):
    """Removes all sites from the site mask.  Should be used with care.

       Example usage:

         >>> print diracAdmin.clearMask()
         {'OK': True, 'Value':''}

       :return: S_OK,S_ERROR

    """
    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.clearMask()
    return result

  #############################################################################
  def getServicePorts(self, setup='', printOutput=False):
    """Checks the service ports for the specified setup.  If not given this is
       taken from the current installation (/DIRAC/Setup)

       Example usage:

         >>> print diracAdmin.getServicePorts()
         {'OK': True, 'Value':''}

       :return: S_OK,S_ERROR

    """
    if not setup:
      setup = gConfig.getValue('/DIRAC/Setup', '')

    setupList = gConfig.getSections('/DIRAC/Setups', [])
    if not setupList['OK']:
      return S_ERROR('Could not get /DIRAC/Setups sections')
    setupList = setupList['Value']
    if setup not in setupList:
      return S_ERROR('Setup %s is not in allowed list: %s' % (setup, ', '.join(setupList)))

    serviceSetups = gConfig.getOptionsDict('/DIRAC/Setups/%s' % setup)
    if not serviceSetups['OK']:
      return S_ERROR('Could not get /DIRAC/Setups/%s options' % setup)
    serviceSetups = serviceSetups['Value']  # dict
    systemList = gConfig.getSections('/Systems')
    if not systemList['OK']:
      return S_ERROR('Could not get Systems sections')
    systemList = systemList['Value']
    result = {}
    for system in systemList:
      if system in serviceSetups:
        path = '/Systems/%s/%s/Services' % (system, serviceSetups[system])
        servicesList = gConfig.getSections(path)
        if not servicesList['OK']:
          self.log.warn('Could not get sections in %s' % path)
        else:
          servicesList = servicesList['Value']
          if not servicesList:
            servicesList = []
          self.log.verbose('System: %s ServicesList: %s' % (system, ', '.join(servicesList)))
          for service in servicesList:
            spath = '%s/%s/Port' % (path, service)
            servicePort = gConfig.getValue(spath, 0)
            if servicePort:
              self.log.verbose('Found port for %s/%s = %s' % (system, service, servicePort))
              result['%s/%s' % (system, service)] = servicePort
            else:
              self.log.warn('No port found for %s' % spath)
      else:
        self.log.warn('%s is not defined in /DIRAC/Setups/%s' % (system, setup))

    if printOutput:
      print self.pPrint.pformat(result)

    return S_OK(result)

  #############################################################################
  def getProxy(self, userDN, userGroup, validity=43200, limited=False):
    """Retrieves a proxy with default 12hr validity and stores
       this in a file in the local directory by default.

       Example usage:

         >>> print diracAdmin.getProxy()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
    return gProxyManager.downloadProxy(userDN, userGroup, limited=limited,
                                       requiredTimeLeft=validity)

  #############################################################################
  def getVOMSProxy(self, userDN, userGroup, vomsAttr=False, validity=43200, limited=False):
    """Retrieves a proxy with default 12hr validity and VOMS extensions and stores
       this in a file in the local directory by default.

       Example usage:

         >>> print diracAdmin.getVOMSProxy()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """
    return gProxyManager.downloadVOMSProxy(userDN, userGroup, limited=limited,
                                           requiredVOMSAttribute=vomsAttr,
                                           requiredTimeLeft=validity)

  #############################################################################
  def getPilotProxy(self, userDN, userGroup, validity=43200):
    """Retrieves a pilot proxy with default 12hr validity and stores
       this in a file in the local directory by default.

       Example usage:

         >>> print diracAdmin.getVOMSProxy()
         {'OK': True, 'Value': }

       :return: S_OK,S_ERROR

    """

    return gProxyManager.getPilotProxyFromDIRACGroup(userDN, userGroup, requiredTimeLeft=validity)

  #############################################################################
  def resetJob(self, jobID):
    """Reset a job or list of jobs in the WMS.  This operation resets the reschedule
       counter for a job or list of jobs and allows them to run as new.

       Example::

         >>> print dirac.reset(12345)
         {'OK': True, 'Value': [12345]}

       :param job: JobID
       :type job: integer or list of integers
       :return: S_OK,S_ERROR

    """
    if isinstance(jobID, basestring):
      try:
        jobID = int(jobID)
      except Exception as x:
        return self._errorReport(str(x), 'Expected integer or convertible integer for existing jobID')
    elif isinstance(jobID, list):
      try:
        jobID = [int(job) for job in jobID]
      except Exception as x:
        return self._errorReport(str(x), 'Expected integer or convertible integer for existing jobIDs')

    jobManager = RPCClient('WorkloadManagement/JobManager', useCertificates=False)
    result = jobManager.resetJob(jobID)
    return result

  #############################################################################
  def getJobPilotOutput(self, jobID, directory=''):
    """Retrieve the pilot output for an existing job in the WMS.
       The output will be retrieved in a local directory unless
       otherwise specified.

         >>> print dirac.getJobPilotOutput(12345)
         {'OK': True, StdOut:'',StdError:''}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
    if not directory:
      directory = self.currentDir

    if not os.path.exists(directory):
      return self._errorReport('Directory %s does not exist' % directory)

    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.getJobPilotOutput(jobID)
    if not result['OK']:
      return result

    outputPath = '%s/pilot_%s' % (directory, jobID)
    if os.path.exists(outputPath):
      self.log.info('Remove %s and retry to continue' % outputPath)
      return S_ERROR('Remove %s and retry to continue' % outputPath)

    if not os.path.exists(outputPath):
      self.log.verbose('Creating directory %s' % outputPath)
      os.mkdir(outputPath)

    outputs = result['Value']
    if 'StdOut' in outputs:
      stdout = '%s/std.out' % (outputPath)
      with open(stdout, 'w') as fopen:
        fopen.write(outputs['StdOut'])
      self.log.verbose('Standard output written to %s' % (stdout))
    else:
      self.log.warn('No standard output returned')

    if 'StdError' in outputs:
      stderr = '%s/std.err' % (outputPath)
      with open(stderr, 'w') as fopen:
        fopen.write(outputs['StdError'])
      self.log.verbose('Standard error written to %s' % (stderr))
    else:
      self.log.warn('No standard error returned')

    self.log.always('Outputs retrieved in %s' % outputPath)
    return result

  #############################################################################
  def getPilotOutput(self, gridReference, directory=''):
    """Retrieve the pilot output  (std.out and std.err) for an existing job in the WMS.

         >>> print dirac.getJobPilotOutput(12345)
         {'OK': True, 'Value': {}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
    if not isinstance(gridReference, basestring):
      return self._errorReport('Expected string for pilot reference')

    if not directory:
      directory = self.currentDir

    if not os.path.exists(directory):
      return self._errorReport('Directory %s does not exist' % directory)

    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.getPilotOutput(gridReference)
    if not result['OK']:
      return result

    gridReferenceSmall = gridReference.split('/')[-1]
    if not gridReferenceSmall:
      gridReferenceSmall = 'reference'
    outputPath = '%s/pilot_%s' % (directory, gridReferenceSmall)

    if os.path.exists(outputPath):
      self.log.info('Remove %s and retry to continue' % outputPath)
      return S_ERROR('Remove %s and retry to continue' % outputPath)

    if not os.path.exists(outputPath):
      self.log.verbose('Creating directory %s' % outputPath)
      os.mkdir(outputPath)

    outputs = result['Value']
    if 'StdOut' in outputs:
      stdout = '%s/std.out' % (outputPath)
      with open(stdout, 'w') as fopen:
        fopen.write(outputs['StdOut'])
      self.log.info('Standard output written to %s' % (stdout))
    else:
      self.log.warn('No standard output returned')

    if 'StdErr' in outputs:
      stderr = '%s/std.err' % (outputPath)
      with open(stderr, 'w') as fopen:
        fopen.write(outputs['StdErr'])
      self.log.info('Standard error written to %s' % (stderr))
    else:
      self.log.warn('No standard error returned')

    self.log.always('Outputs retrieved in %s' % outputPath)
    return result

  #############################################################################
  def getPilotInfo(self, gridReference):
    """Retrieve info relative to a pilot reference

         >>> print dirac.getPilotInfo(12345)
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
    if not isinstance(gridReference, basestring):
      return self._errorReport('Expected string for pilot reference')

    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.getPilotInfo(gridReference)
    return result

  #############################################################################
  def killPilot(self, gridReference):
    """Kill the pilot specified

         >>> print dirac.getPilotInfo(12345)
         {'OK': True, 'Value': {}}

       :param gridReference: Pilot Job Reference
       :return: S_OK,S_ERROR
    """
    if not isinstance(gridReference, basestring):
      return self._errorReport('Expected string for pilot reference')

    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.killPilot(gridReference)
    return result

  #############################################################################
  def getPilotLoggingInfo(self, gridReference):
    """Retrieve the pilot logging info for an existing job in the WMS.

         >>> print dirac.getPilotLoggingInfo(12345)
         {'OK': True, 'Value': {"The output of the command"}}

       :param gridReference: Gridp pilot job reference Id
       :type gridReference: string
       :return: S_OK,S_ERROR
    """
    if not isinstance(gridReference, basestring):
      return self._errorReport('Expected string for pilot reference')

    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    return wmsAdmin.getPilotLoggingInfo(gridReference)

  #############################################################################
  def getJobPilots(self, jobID):
    """Extract the list of submitted pilots and their status for a given
       jobID from the WMS.  Useful information is printed to the screen.

         >>> print dirac.getJobPilots()
         {'OK': True, 'Value': {PilotID:{StatusDict}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR

    """
    if isinstance(jobID, basestring):
      try:
        jobID = int(jobID)
      except Exception as x:
        return self._errorReport(str(x), 'Expected integer or string for existing jobID')

    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.getPilots(jobID)
    if result['OK']:
      print self.pPrint.pformat(result['Value'])
    return result

  #############################################################################
  def getPilotSummary(self, startDate='', endDate=''):
    """Retrieve the pilot output for an existing job in the WMS.  Summary is
       printed at INFO level, full dictionary of results also returned.

         >>> print dirac.getPilotSummary()
         {'OK': True, 'Value': {CE:{Status:Count}}}

       :param job: JobID
       :type job: integer or string
       :return: S_OK,S_ERROR
    """
    wmsAdmin = RPCClient('WorkloadManagement/WMSAdministrator')
    result = wmsAdmin.getPilotSummary(startDate, endDate)
    if not result['OK']:
      return result

    ceDict = result['Value']
    headers = 'CE'.ljust(28)
    i = 0
    for ce, summary in ceDict.iteritems():
      states = summary.keys()
      if len(states) > i:
        i = len(states)

    for i in xrange(i):
      headers += 'Status'.ljust(12) + 'Count'.ljust(12)
    print headers

    for ce, summary in ceDict.iteritems():
      line = ce.ljust(28)
      states = sorted(summary)
      for state in states:
        count = str(summary[state])
        line += state.ljust(12) + count.ljust(12)
      print line

    return result

  #############################################################################
  def selectRequests(self, jobID=None, requestID=None, requestName=None,
                     requestType=None, status=None, operation=None, ownerDN=None,
                     ownerGroup=None, requestStart=0, limit=100, printOutput=False):
    """Select requests from the request management system. A few notes on the selection criteria:

         - jobID is the WMS JobID for the request (if applicable)
         - requestID is assigned during submission of the request
         - requestName is the corresponding XML file name
         - requestType e.g. 'transfer'
         - status e.g. Done
         - operation e.g. replicateAndRegister
         - requestStart e.g. the first request to consider (start from 0 by default)
         - limit e.g. selection limit (default 100)

       >>> dirac.selectRequests(jobID='4894')
       {'OK': True, 'Value': [[<Requests>]]}

    """
    options = {'RequestID': requestID, 'RequestName': requestName, 'JobID': jobID, 'OwnerDN': ownerDN,
               'OwnerGroup': ownerGroup, 'RequestType': requestType, 'Status': status, 'Operation': operation}

    conditions = {}
    for key, value in options.iteritems():
      if value:
        try:
          conditions[key] = str(value)
        except Exception as x:
          return self._errorReport(str(x), 'Expected string for %s field' % key)

    try:
      requestStart = int(requestStart)
      limit = int(limit)
    except Exception as x:
      return self._errorReport(str(x), 'Expected integer for %s field' % limit)

    self.log.verbose('Will select requests with the following conditions')
    self.log.verbose(self.pPrint.pformat(conditions))
    requestClient = RPCClient("RequestManagement/centralURL")
    result = requestClient.getRequestSummaryWeb(conditions, [], requestStart, limit)
    if not result['OK']:
      self.log.warn(result['Message'])
      return result

    requestIDs = result['Value']
    conds = []
    for key, value in conditions.iteritems():
      if value:
        conds.append('%s = %s' % (key, value))
    self.log.verbose('%s request(s) selected with conditions %s and limit %s' % (len(requestIDs['Records']),
                                                                                 ', '.join(conds), limit))
    if printOutput:
      requests = []
      if len(requestIDs['Records']) > limit:
        requestList = requestIDs['Records']
        requests = requestList[:limit]
      else:
        requests = requestIDs['Records']
      print '%s request(s) selected with conditions %s and limit %s' % (len(requestIDs['Records']),
                                                                        ', '.join(conds), limit)
      print requestIDs['ParameterNames']
      for request in requests:
        print request
    if not requestIDs:
      return S_ERROR('No requests selected for conditions: %s' % conditions)
    else:
      return result

  #############################################################################
  def getRequestSummary(self, printOutput=False):
    """
    Get a summary of the requests in the request DB.
    """
    requestClient = RPCClient("RequestManagement/centralURL", timeout=120)
    result = requestClient.getDBSummary()
    if not result['OK']:
      self.log.warn(result['Message'])
      return result

    if printOutput:
      print self.pPrint.pformat(result['Value'])

    return result

  #############################################################################
  def getExternalPackageVersions(self):
    """
    Simple function that attempts to obtain the external versions for
    the local DIRAC installation (frequently needed for debugging purposes).
    """
    gLogger.info('DIRAC version v%dr%d build %d' % (DIRAC.majorVersion, DIRAC.minorVersion, DIRAC.patchLevel))
    try:
      import lcg_util  # pylint: disable=import-error
      infoStr = 'Using lcg_util from: \n%s' % lcg_util.__file__
      gLogger.info(infoStr)
      infoStr = "The version of lcg_utils is %s" % lcg_util.lcg_util_version()
      gLogger.info(infoStr)
    except Exception as x:
      errStr = "SRM2Storage.__init__: Failed to import lcg_util: %s" % (x)
      gLogger.exception(errStr)

    try:
      import gfalthr as gfal  # pylint: disable=import-error
      infoStr = "Using gfalthr from: \n%s" % gfal.__file__
      gLogger.info(infoStr)
      infoStr = "The version of gfalthr is %s" % gfal.gfal_version()
      gLogger.info(infoStr)
    except Exception as x:
      errStr = "SRM2Storage.__init__: Failed to import gfalthr: %s." % (x)
      gLogger.warn(errStr)
      try:
        import gfal  # pylint: disable=import-error
        infoStr = "Using gfal from: %s" % gfal.__file__
        gLogger.info(infoStr)
        infoStr = "The version of gfal is %s" % gfal.gfal_version()
        gLogger.info(infoStr)
      except Exception as x:
        errStr = "SRM2Storage.__init__: Failed to import gfal: %s" % (x)
        gLogger.exception(errStr)

    defaultProtocols = gConfig.getValue('/Resources/StorageElements/DefaultProtocols', [])
    gLogger.info('Default list of protocols are: %s' % (', '.join(defaultProtocols)))
    return S_OK()

  #############################################################################
  def getSiteProtocols(self, site, printOutput=False):
    """
    Allows to check the defined protocols for each site SE.
    """
    result = self.__checkSiteIsValid(site)
    if not result['OK']:
      return result

    siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site)
    siteSEs = gConfig.getValue(siteSection, [])
    if not siteSEs:
      return S_ERROR('No SEs found for site %s in section %s' % (site, siteSection))

    defaultProtocols = gConfig.getValue('/Resources/StorageElements/DefaultProtocols', [])
    self.log.verbose('Default list of protocols are' ', '.join(defaultProtocols))
    seInfo = {}
    siteSEs.sort()
    for se in siteSEs:
      sections = gConfig.getSections('/Resources/StorageElements/%s/' % (se))
      if not sections['OK']:
        return sections
      for section in sections['Value']:
        if gConfig.getValue('/Resources/StorageElements/%s/%s/ProtocolName' % (se, section), '') == 'SRM2':
          path = '/Resources/StorageElements/%s/%s/ProtocolsList' % (se, section)
          seProtocols = gConfig.getValue(path, [])
          if not seProtocols:
            seProtocols = defaultProtocols
          seInfo[se] = seProtocols

    if printOutput:
      print '\nSummary of protocols for StorageElements at site %s' % site
      print '\nStorageElement'.ljust(30) + 'ProtocolsList'.ljust(30) + '\n'
      for se, protocols in seInfo.iteritems():
        print se.ljust(30) + ', '.join(protocols).ljust(30)

    return S_OK(seInfo)

  #############################################################################
  def setSiteProtocols(self, site, protocolsList, printOutput=False):
    """
    Allows to set the defined protocols for each SE for a given site.
    """
    result = self.__checkSiteIsValid(site)
    if not result['OK']:
      return result

    siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site)
    siteSEs = gConfig.getValue(siteSection, [])
    if not siteSEs:
      return S_ERROR('No SEs found for site %s in section %s' % (site, siteSection))

    defaultProtocols = gConfig.getValue('/Resources/StorageElements/DefaultProtocols', [])
    self.log.verbose('Default list of protocols are', ', '.join(defaultProtocols))

    for protocol in protocolsList:
      if protocol not in defaultProtocols:
        return S_ERROR('Requested to set protocol %s in list but %s is not '
                       'in default list of protocols:\n%s' % (protocol, protocol, ', '.join(defaultProtocols)))

    modifiedCS = False
    result = promptUser('Do you want to add the following default protocols:'
                        ' %s for SE(s):\n%s' % (', '.join(protocolsList), ', '.join(siteSEs)))
    if not result['OK']:
      return result
    if result['Value'].lower() != 'y':
      self.log.always('No protocols will be added')
      return S_OK()

    for se in siteSEs:
      sections = gConfig.getSections('/Resources/StorageElements/%s/' % (se))
      if not sections['OK']:
        return sections
      for section in sections['Value']:
        if gConfig.getValue('/Resources/StorageElements/%s/%s/ProtocolName' % (se, section), '') == 'SRM2':
          path = '/Resources/StorageElements/%s/%s/ProtocolsList' % (se, section)
          self.log.verbose('Setting %s to %s' % (path, ', '.join(protocolsList)))
          result = self.csSetOption(path, ', '.join(protocolsList))
          if not result['OK']:
            return result
          modifiedCS = True

    if modifiedCS:
      result = self.csCommitChanges(False)
      if not result['OK']:
        return S_ERROR('CS Commit failed with message = %s' % (result['Message']))
      else:
        if printOutput:
          print 'Successfully committed changes to CS'
    else:
      if printOutput:
        print 'No modifications to CS required'

    return S_OK()

  #############################################################################
  def csSetOption(self, optionPath, optionValue):
    """
    Function to modify an existing value in the CS.
    """
    return self.csAPI.setOption(optionPath, optionValue)

  #############################################################################
  def csSetOptionComment(self, optionPath, comment):
    """
    Function to modify an existing value in the CS.
    """
    return self.csAPI.setOptionComment(optionPath, comment)

  #############################################################################
  def csModifyValue(self, optionPath, newValue):
    """
    Function to modify an existing value in the CS.
    """
    return self.csAPI.modifyValue(optionPath, newValue)

  #############################################################################
  def csRegisterUser(self, username, properties):
    """
    Registers a user in the CS.

        - username: Username of the user (easy;)
        - properties: Dict containing:
            - DN
            - groups : list/tuple of groups the user belongs to
            - <others> : More properties of the user, like mail

    """
    return self.csAPI.addUser(username, properties)

  #############################################################################
  def csDeleteUser(self, user):
    """
    Deletes a user from the CS. Can take a list of users
    """
    return self.csAPI.deleteUsers(user)

  #############################################################################
  def csModifyUser(self, username, properties, createIfNonExistant=False):
    """
    Modify a user in the CS. Takes the same params as in addUser and
    applies the changes
    """
    return self.csAPI.modifyUser(username, properties, createIfNonExistant)

  #############################################################################
  def csListUsers(self, group=False):
    """
    Lists the users in the CS. If no group is specified return all users.
    """
    return self.csAPI.listUsers(group)

  #############################################################################
  def csDescribeUsers(self, mask=False):
    """
    List users and their properties in the CS.
    If a mask is given, only users in the mask will be returned
    """
    return self.csAPI.describeUsers(mask)

  #############################################################################
  def csModifyGroup(self, groupname, properties, createIfNonExistant=False):
    """
    Modify a user in the CS. Takes the same params as in addGroup and applies
    the changes
    """
    return self.csAPI.modifyGroup(groupname, properties, createIfNonExistant)

  #############################################################################
  def csListHosts(self):
    """
    Lists the hosts in the CS
    """
    return self.csAPI.listHosts()

  #############################################################################
  def csDescribeHosts(self, mask=False):
    """
    Gets extended info for the hosts in the CS
    """
    return self.csAPI.describeHosts(mask)

  #############################################################################
  def csModifyHost(self, hostname, properties, createIfNonExistant=False):
    """
    Modify a host in the CS. Takes the same params as in addHost and applies
    the changes
    """
    return self.csAPI.modifyHost(hostname, properties, createIfNonExistant)

  #############################################################################
  def csListGroups(self):
    """
    Lists groups in the CS
    """
    return self.csAPI.listGroups()

  #############################################################################
  def csDescribeGroups(self, mask=False):
    """
    List groups and their properties in the CS.
    If a mask is given, only groups in the mask will be returned
    """
    return self.csAPI.describeGroups(mask)

  #############################################################################
  def csSyncUsersWithCFG(self, usersCFG):
    """
    Synchronize users in cfg with its contents
    """
    return self.csAPI.syncUsersWithCFG(usersCFG)

  #############################################################################
  def csCommitChanges(self, sortUsers=True):
    """
    Commit the changes in the CS
    """
    return self.csAPI.commitChanges(sortUsers=False)

  #############################################################################
  def sendMail(self, address, subject, body, fromAddress=None, localAttempt=True, html=False):
    """
    Send mail to specified address with body.
    """
    notification = NotificationClient()
    return notification.sendMail(address, subject, body, fromAddress, localAttempt, html)

  #############################################################################
  def sendSMS(self, userName, body, fromAddress=None):
    """
    Send mail to specified address with body.
    """
    if len(body) > 160:
      return S_ERROR('Exceeded maximum SMS length of 160 characters')
    notification = NotificationClient()
    return notification.sendSMS(userName, body, fromAddress)

  #############################################################################
  def getBDIISite(self, site, host=None):
    """
    Get information about site from BDII at host
    """
    return ldapSite(site, host=host)

  #############################################################################
  def getBDIICluster(self, ce, host=None):
    """
    Get information about ce from BDII at host
    """
    return ldapCluster(ce, host=host)

  #############################################################################
  def getBDIICE(self, ce, host=None):
    """
    Get information about ce from BDII at host
    """
    return ldapCE(ce, host=host)

  #############################################################################
  def getBDIIService(self, ce, host=None):
    """
    Get information about ce from BDII at host
    """
    return ldapService(ce, host=host)

  #############################################################################
  def getBDIICEState(self, ce, useVO=voName, host=None):
    """
    Get information about ce state from BDII at host
    """
    return ldapCEState(ce, useVO, host=host)

  #############################################################################
  def getBDIICEVOView(self, ce, useVO=voName, host=None):
    """
    Get information about ce voview from BDII at host
    """
    return ldapCEVOView(ce, useVO, host=host)

  #############################################################################
  def getBDIISE(self, site, useVO=voName, host=None):
    """
    Get information about SA  from BDII at host
    """
    return ldapSE(site, useVO, host=host)
示例#7
0
文件: Matcher.py 项目: vingar/DIRAC
class Matcher(object):
    """ Logic for matching
  """
    def __init__(self,
                 pilotAgentsDB=None,
                 jobDB=None,
                 tqDB=None,
                 jlDB=None,
                 opsHelper=None):
        """ c'tor
    """
        if pilotAgentsDB:
            self.pilotAgentsDB = pilotAgentsDB
        else:
            self.pilotAgentsDB = PilotAgentsDB()
        if jobDB:
            self.jobDB = jobDB
        else:
            self.jobDB = JobDB()
        if tqDB:
            self.tqDB = tqDB
        else:
            self.tqDB = TaskQueueDB()
        if jlDB:
            self.jlDB = jlDB
        else:
            self.jlDB = JobLoggingDB()

        if opsHelper:
            self.opsHelper = opsHelper
        else:
            self.opsHelper = Operations()

        self.log = gLogger.getSubLogger("Matcher")

        self.limiter = Limiter(jobDB=self.jobDB, opsHelper=self.opsHelper)

        self.siteClient = SiteStatus()

    def selectJob(self, resourceDescription, credDict):
        """ Main job selection function to find the highest priority job matching the resource capacity
    """

        startTime = time.time()

        resourceDict = self._getResourceDict(resourceDescription, credDict)

        # Make a nice print of the resource matching parameters
        toPrintDict = dict(resourceDict)
        if "MaxRAM" in resourceDescription:
            toPrintDict['MaxRAM'] = resourceDescription['MaxRAM']
        if "NumberOfProcessors" in resourceDescription:
            toPrintDict['NumberOfProcessors'] = resourceDescription[
                'NumberOfProcessors']
        toPrintDict['Tag'] = []
        if "Tag" in resourceDict:
            for tag in resourceDict['Tag']:
                if not tag.endswith('GB') and not tag.endswith('Processors'):
                    toPrintDict['Tag'].append(tag)
        if not toPrintDict['Tag']:
            toPrintDict.pop('Tag')
        gLogger.info('Resource description for matching',
                     printDict(toPrintDict))

        negativeCond = self.limiter.getNegativeCondForSite(
            resourceDict['Site'])
        result = self.tqDB.matchAndGetJob(resourceDict,
                                          negativeCond=negativeCond)

        if not result['OK']:
            raise RuntimeError(result['Message'])
        result = result['Value']
        if not result['matchFound']:
            self.log.info("No match found")
            return {}

        jobID = result['jobId']
        resAtt = self.jobDB.getJobAttributes(
            jobID, ['OwnerDN', 'OwnerGroup', 'Status'])
        if not resAtt['OK']:
            raise RuntimeError('Could not retrieve job attributes')
        if not resAtt['Value']:
            raise RuntimeError("No attributes returned for job")
        if not resAtt['Value']['Status'] == 'Waiting':
            self.log.error('Job matched by the TQ is not in Waiting state',
                           str(jobID))
            result = self.tqDB.deleteJob(jobID)
            if not result['OK']:
                raise RuntimeError(result['Message'])
            raise RuntimeError("Job %s is not in Waiting state" % str(jobID))

        self._reportStatus(resourceDict, jobID)

        result = self.jobDB.getJobJDL(jobID)
        if not result['OK']:
            raise RuntimeError("Failed to get the job JDL")

        resultDict = {}
        resultDict['JDL'] = result['Value']
        resultDict['JobID'] = jobID

        matchTime = time.time() - startTime
        self.log.info("Match time: [%s]" % str(matchTime))
        gMonitor.addMark("matchTime", matchTime)

        # Get some extra stuff into the response returned
        resOpt = self.jobDB.getJobOptParameters(jobID)
        if resOpt['OK']:
            for key, value in resOpt['Value'].items():
                resultDict[key] = value
        resAtt = self.jobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup'])
        if not resAtt['OK']:
            raise RuntimeError('Could not retrieve job attributes')
        if not resAtt['Value']:
            raise RuntimeError('No attributes returned for job')

        if self.opsHelper.getValue("JobScheduling/CheckMatchingDelay", True):
            self.limiter.updateDelayCounters(resourceDict['Site'], jobID)

        pilotInfoReportedFlag = resourceDict.get('PilotInfoReportedFlag',
                                                 False)
        if not pilotInfoReportedFlag:
            self._updatePilotInfo(resourceDict)
        self._updatePilotJobMapping(resourceDict, jobID)

        resultDict['DN'] = resAtt['Value']['OwnerDN']
        resultDict['Group'] = resAtt['Value']['OwnerGroup']
        resultDict['PilotInfoReportedFlag'] = True

        return resultDict

    def _getResourceDict(self, resourceDescription, credDict):
        """ from resourceDescription to resourceDict (just various mods)
    """
        resourceDict = self._processResourceDescription(resourceDescription)
        resourceDict = self._checkCredentials(resourceDict, credDict)
        self._checkPilotVersion(resourceDict)
        if not self._checkMask(resourceDict):
            # Banned destinations can only take Test jobs
            resourceDict['JobType'] = 'Test'

        self.log.verbose("Resource description:")
        for key in resourceDict:
            self.log.verbose("%s : %s" % (key.rjust(20), resourceDict[key]))

        return resourceDict

    def _processResourceDescription(self, resourceDescription):
        """ Check and form the resource description dictionary

        resourceDescription is a ceDict coming from a JobAgent, for example.
    """

        resourceDict = {}
        for name in singleValueDefFields:
            if resourceDescription.has_key(name):
                resourceDict[name] = resourceDescription[name]

        for name in multiValueMatchFields:
            if name in resourceDescription:
                resourceDict[name] = resourceDescription[name]

        for name in tagMatchFields:
            if name in resourceDescription and resourceDescription[name]:
                resourceDict[name] = resourceDescription[name]
            rname = 'Required%s' % name
            if rname in resourceDescription:
                resourceDict[rname] = resourceDescription[rname]

        if 'JobID' in resourceDescription:
            resourceDict['JobID'] = resourceDescription['JobID']

        # Convert MaxRAM and NumberOfProcessors parameters into a list of tags
        maxRAM = resourceDescription.get('MaxRAM')
        if maxRAM:
            try:
                maxRAM = int(maxRAM) / 1000
            except ValueError:
                maxRAM = None
        nProcessors = resourceDescription.get('NumberOfProcessors')
        if nProcessors:
            try:
                nProcessors = int(nProcessors)
            except ValueError:
                nProcessors = None
        for param, key in [(maxRAM, 'GB'), (nProcessors, 'Processors')]:
            if param and param <= 128:
                paramList = range(2, param + 1)
                paramTags = ['%d%s' % (par, key) for par in paramList]
                if paramTags:
                    resourceDict.setdefault("Tag", []).extend(paramTags)

        if "WholeNode" in resourceDescription:
            resourceDict.setdefault("Tag", []).append("WholeNode")

        if 'Tag' in resourceDict:
            resourceDict['Tag'] = list(set(resourceDict['Tag']))

        for k in ('DIRACVersion', 'ReleaseVersion', 'ReleaseProject',
                  'VirtualOrganization', 'PilotReference', 'PilotBenchmark',
                  'PilotInfoReportedFlag'):
            if k in resourceDescription:
                resourceDict[k] = resourceDescription[k]

        return resourceDict

    def _reportStatus(self, resourceDict, jobID):
        """ Reports the status of the matched job in jobDB and jobLoggingDB

        Do not fail if errors happen here
    """
        attNames = ['Status', 'MinorStatus', 'ApplicationStatus', 'Site']
        attValues = ['Matched', 'Assigned', 'Unknown', resourceDict['Site']]
        result = self.jobDB.setJobAttributes(jobID, attNames, attValues)
        if not result['OK']:
            self.log.error(
                "Problem reporting job status",
                "setJobAttributes, jobID = %s: %s" %
                (jobID, result['Message']))
        else:
            self.log.verbose("Set job attributes for jobID %s" % jobID)

        result = self.jlDB.addLoggingRecord(jobID,
                                            status='Matched',
                                            minor='Assigned',
                                            source='Matcher')
        if not result['OK']:
            self.log.error(
                "Problem reporting job status",
                "addLoggingRecord, jobID = %s: %s" %
                (jobID, result['Message']))
        else:
            self.log.verbose("Added logging record for jobID %s" % jobID)

    def _checkMask(self, resourceDict):
        """ Check the mask: are we allowed to run normal jobs?

        FIXME: should we move to site OR SE?
    """
        if not 'Site' in resourceDict:
            self.log.error("Missing Site Name in Resource JDL")
            raise RuntimeError("Missing Site Name in Resource JDL")

        # Get common site mask and check the agent site
        result = self.siteClient.getSites(siteState='Active')
        if not result['OK']:
            self.log.error("Internal error",
                           "getSiteMask: %s" % result['Message'])
            raise RuntimeError("Internal error")
        maskList = result['Value']

        if resourceDict['Site'] not in maskList:
            return False

        return True

    def _updatePilotInfo(self, resourceDict):
        """ Update pilot information - do not fail if we don't manage to do it
    """
        pilotReference = resourceDict.get('PilotReference', '')
        if pilotReference:
            gridCE = resourceDict.get('GridCE', 'Unknown')
            site = resourceDict.get('Site', 'Unknown')
            benchmark = resourceDict.get('PilotBenchmark', 0.0)
            self.log.verbose(
                'Reporting pilot info for %s: gridCE=%s, site=%s, benchmark=%f'
                % (pilotReference, gridCE, site, benchmark))

            result = self.pilotAgentsDB.setPilotStatus(pilotReference,
                                                       status='Running',
                                                       gridSite=site,
                                                       destination=gridCE,
                                                       benchmark=benchmark)
            if not result['OK']:
                self.log.warn(
                    "Problem updating pilot information",
                    "; setPilotStatus. pilotReference: %s; %s" %
                    (pilotReference, result['Message']))

    def _updatePilotJobMapping(self, resourceDict, jobID):
        """ Update pilot to job mapping information
    """
        pilotReference = resourceDict.get('PilotReference', '')
        if pilotReference:
            result = self.pilotAgentsDB.setCurrentJobID(pilotReference, jobID)
            if not result['OK']:
                self.log.error(
                    "Problem updating pilot information",
                    ";setCurrentJobID. pilotReference: %s; %s" %
                    (pilotReference, result['Message']))
            result = self.pilotAgentsDB.setJobForPilot(jobID,
                                                       pilotReference,
                                                       updateStatus=False)
            if not result['OK']:
                self.log.error(
                    "Problem updating pilot information",
                    "; setJobForPilot. pilotReference: %s; %s" %
                    (pilotReference, result['Message']))

    def _checkCredentials(self, resourceDict, credDict):
        """ Check if we can get a job given the passed credentials
    """
        if Properties.GENERIC_PILOT in credDict['properties']:
            # You can only match groups in the same VO
            if credDict['group'] == "hosts":
                # for the host case the VirtualOrganization parameter
                # is mandatory in resourceDict
                vo = resourceDict.get('VirtualOrganization', '')
            else:
                vo = Registry.getVOForGroup(credDict['group'])
            result = Registry.getGroupsForVO(vo)
            if result['OK']:
                resourceDict['OwnerGroup'] = result['Value']
            else:
                raise RuntimeError(result['Message'])
        else:
            # If it's a private pilot, the DN has to be the same
            if Properties.PILOT in credDict['properties']:
                self.log.notice(
                    "Setting the resource DN to the credentials DN")
                resourceDict['OwnerDN'] = credDict['DN']
            # If it's a job sharing. The group has to be the same and just check that the DN (if any)
            # belongs to the same group
            elif Properties.JOB_SHARING in credDict['properties']:
                resourceDict['OwnerGroup'] = credDict['group']
                self.log.notice(
                    "Setting the resource group to the credentials group")
                if 'OwnerDN' in resourceDict and resourceDict[
                        'OwnerDN'] != credDict['DN']:
                    ownerDN = resourceDict['OwnerDN']
                    result = Registry.getGroupsForDN(resourceDict['OwnerDN'])
                    if not result['OK']:
                        raise RuntimeError(result['Message'])
                    if credDict['group'] not in result['Value']:
                        # DN is not in the same group! bad boy.
                        self.log.notice(
                            "You cannot request jobs from DN %s. It does not belong to your group!"
                            % ownerDN)
                        resourceDict['OwnerDN'] = credDict['DN']
            # Nothing special, group and DN have to be the same
            else:
                resourceDict['OwnerDN'] = credDict['DN']
                resourceDict['OwnerGroup'] = credDict['group']

        return resourceDict

    def _checkPilotVersion(self, resourceDict):
        """ Check the pilot DIRAC version
    """
        if self.opsHelper.getValue("Pilot/CheckVersion", True):
            if 'ReleaseVersion' not in resourceDict:
                if not 'DIRACVersion' in resourceDict:
                    raise RuntimeError(
                        'Version check requested and not provided by Pilot')
                else:
                    pilotVersion = resourceDict['DIRACVersion']
            else:
                pilotVersion = resourceDict['ReleaseVersion']

            validVersions = self.opsHelper.getValue("Pilot/Version", [])
            if validVersions and pilotVersion not in validVersions:
                raise RuntimeError( 'Pilot version does not match the production version %s not in ( %s )' % \
                                    ( pilotVersion, ",".join( validVersions ) ) )
            # Check project if requested
            validProject = self.opsHelper.getValue("Pilot/Project", "")
            if validProject:
                if 'ReleaseProject' not in resourceDict:
                    raise RuntimeError(
                        "Version check requested but expected project %s not received"
                        % validProject)
                if resourceDict['ReleaseProject'] != validProject:
                    raise RuntimeError(
                        "Version check requested \
          but expected project %s != received %s" %
                        (validProject, resourceDict['ReleaseProject']))