示例#1
0
  def test_CreateAndSubmit( self ):

    jobParams = {'JobID': '1',
                 'JobType': 'Merge',
                 'CPUTime': '1000000',
                 'Executable': '$DIRACROOT/scripts/dirac-jobexec',
                 'Arguments': "helloWorld.xml -o LogLevel=DEBUG pilot.cfg",
                 'ExtraOptions': 'pilot.cfg',
                 'InputSandbox': ['helloWorld.xml', 'exe-script.py']}
    resourceParams = {}
    optimizerParams = {}

#     res = createJobWrapper( 1, jobParams, resourceParams, optimizerParams, logLevel = 'DEBUG' )
#     self.assert_( res['OK'] )
#     wrapperFile = res['Value']

    ceFactory = ComputingElementFactory()
    ceInstance = ceFactory.getCE( 'InProcess' )
    self.assert_( ceInstance['OK'] )
    computingElement = ceInstance['Value']

#     res = computingElement.submitJob( wrapperFile, self.payloadProxy )
#     self.assert_( res['OK'] )

    res = createJobWrapper( 2, jobParams, resourceParams, optimizerParams, extraOptions = 'pilot.cfg', logLevel = 'DEBUG' )
    self.assert_( res['OK'] )
    wrapperFile = res['Value']

    res = computingElement.submitJob( wrapperFile, self.payloadProxy )
    self.assert_( res['OK'] )
  def export_killPilot(self, pilotRefList ):
    """ Kill the specified pilots
    """
    # Make a list if it is not yet
    pilotRefs = list( pilotRefList )
    if type( pilotRefList ) in StringTypes:
      pilotRefs = [pilotRefList]
    
    # Regroup pilots per site and per owner
    pilotRefDict = {}
    for pilotReference in pilotRefs:
      result = pilotDB.getPilotInfo(pilotReference)
      if not result['OK'] or not result[ 'Value' ]:
        return S_ERROR('Failed to get info for pilot ' + pilotReference)
  
      pilotDict = result['Value'][pilotReference]
      owner = pilotDict['OwnerDN']
      group = pilotDict['OwnerGroup']
      queue = '@@@'.join( [owner, group, pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']] )
      gridType = pilotDict['GridType']
      pilotRefDict.setdefault( queue, {} )
      pilotRefDict[queue].setdefault( 'PilotList', [] )
      pilotRefDict[queue]['PilotList'].append( pilotReference )
      pilotRefDict[queue]['GridType'] = gridType
      
    # Do the work now queue by queue  
    ceFactory = ComputingElementFactory()
    failed = []
    for key, pilotDict in pilotRefDict.items():
      
      owner,group,site,ce,queue = key.split( '@@@' )
      result = getQueue( site, ce, queue )
      if not result['OK']:
        return result
      queueDict = result['Value']
      gridType = pilotDict['GridType']
      result = ceFactory.getCE( gridType, ce, queueDict )
      if not result['OK']:
        return result
      ce = result['Value']
  
      if gridType in ["LCG","gLite","CREAM"]:
        group = getGroupOption(group,'VOMSRole',group)
        ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
        if not ret['OK']:
          gLogger.error( ret['Message'] )
          gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
          return S_ERROR("Failed to get the pilot's owner proxy")
        proxy = ret['Value']
        ce.setProxy( proxy )

      pilotList = pilotDict['PilotList']
      result = ce.killJob( pilotList )
      if not result['OK']:
        failed.extend( pilotList )
      
    if failed:
      return S_ERROR('Failed to kill at least some pilots')
    
    return S_OK()  
示例#3
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        # Disable monitoring
        self.am_setOption('MonitoringEnabled', False)
        # self.log.setLevel('debug') #temporary for debugging
        self.am_setOption('MaxCycles', loops)

        ceType = self.am_getOption('CEType', 'InProcess')
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration = %s' %
                          localCE)
            ceType = localCE

        # Create backend Computing Element
        ceFactory = ComputingElementFactory()
        self.ceName = ceType
        ceInstance = ceFactory.getCE(ceType)
        if not ceInstance['OK']:
            self.log.warn(ceInstance['Message'])
            return ceInstance
        self.computingElement = ceInstance['Value']

        result = self.computingElement.getDescription()
        if not result['OK']:
            self.log.warn("Can not get the CE description")
            return result
        ceDict = result['Value']
        self.timeLeft = ceDict.get('CPUTime', self.timeLeft)
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft)

        self.initTimes = os.times()
        # Localsite options
        self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName)
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               self.pilotReference)
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', self.defaultProxyLength)
        # Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          self.cpuFactor)
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay',
                                                    self.jobSubmissionDelay)
        self.fillingMode = self.am_getOption('FillingModeFlag',
                                             self.fillingMode)
        self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft',
                                                 self.minimumTimeLeft)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', self.stopOnApplicationFailure)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', self.stopAfterFailedMatches)
        self.extraOptions = gConfig.getValue(
            '/AgentJobRequirements/ExtraOptions', self.extraOptions)
        # Timeleft
        self.timeLeftUtil = TimeLeft()
        return S_OK()
示例#4
0
  def getQueues( self, resourceDict ):
    """ Get the list of relevant CEs and their descriptions
    """

    self.queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in resourceDict:
      for ce in resourceDict[site]:
        ceDict = resourceDict[site][ce]
        qDict = ceDict.pop( 'Queues' )
        for queue in qDict:
          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = qDict[queue]
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = site
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )
          qwDir = os.path.join( self.workingDirectory, queue )
          if not os.path.exists( qwDir ):
            os.makedirs( qwDir )
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir
          ceQueueDict = dict( ceDict )
          ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] )
          result = ceFactory.getCE( ceName = ce,
                                    ceType = ceDict['CEType'],
                                    ceParametersDict = ceQueueDict )
          if not result['OK']:
            return result
          self.queueDict[queueName]['CE'] = result['Value']
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceDict['CEType']
          self.queueDict[queueName]['Site'] = site
          self.queueDict[queueName]['QueueName'] = queue
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            self.queueDict[queueName]['BundleProxy'] = True

    return S_OK()
示例#5
0
  def __createCE( self, ceName ):
    """
    return a CE object for the given ceName
    """
    self.log.info( "Creating %s CE" % ( ceName ) )

    ceFactory = ComputingElementFactory()
    ret = ceFactory.getCE( ceName )
    if not ret['OK']:
      self.log.warn( ret['Message'] )
      return ret
    return ret
示例#6
0
  def initialize( self, loops = 0 ):
    """Sets default parameters and creates CE instance
    """
    # Disable monitoring
    self.am_setOption( 'MonitoringEnabled', False )
    # self.log.setLevel('debug') #temporary for debugging
    self.am_setOption( 'MaxCycles', loops )

    ceType = self.am_getOption( 'CEType', 'InProcess' )
    localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
    if localCE:
      self.log.info( 'Defining CE from local configuration = %s' % localCE )
      ceType = localCE

    # Create backend Computing Element
    ceFactory = ComputingElementFactory()
    self.ceName = ceType
    ceInstance = ceFactory.getCE( ceType )
    if not ceInstance['OK']:
      self.log.warn( ceInstance['Message'] )
      return ceInstance
    self.computingElement = ceInstance['Value']

    result = self.computingElement.getDescription()
    if not result['OK']:
      self.log.warn( "Can not get the CE description" )
      return result
    ceDict = result['Value']
    self.timeLeft = ceDict.get( 'CPUTime', 0.0 )
    self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft )

    self.initTimes = os.times()
    # Localsite options
    self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
    self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
    self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
    # Agent options
    # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
    self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
    self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
    self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
    self.minimumTimeLeft = self.am_getOption( 'MinimumTimeLeft', 1000 )
    self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
    self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
    self.jobCount = 0
    self.matchFailedCount = 0
    self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', '' )
    # Timeleft
    self.timeLeftUtil = TimeLeft()
    self.timeLeftError = ''
    self.scaledCPUTime = 0.0
    self.pilotInfoReportedFlag = False
    return S_OK()
示例#7
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        # Disable monitoring
        self.am_setOption('MonitoringEnabled', False)
        # self.log.setLevel('debug') #temporary for debugging
        self.am_setOption('MaxCycles', loops)

        ceType = self.am_getOption('CEType', 'InProcess')
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration = %s' %
                          localCE)
            ceType = localCE

        ceFactory = ComputingElementFactory()
        self.ceName = ceType
        ceInstance = ceFactory.getCE(ceType)
        if not ceInstance['OK']:
            self.log.warn(ceInstance['Message'])
            return ceInstance

        self.initTimes = os.times()

        self.computingElement = ceInstance['Value']
        #Localsite options
        self.siteName = gConfig.getValue('/LocalSite/Site', 'Unknown')
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               'Unknown')
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', 86400 * 5)
        #Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          0.0)
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', 10)
        self.fillingMode = self.am_getOption('FillingModeFlag', False)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', True)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', 10)
        self.jobCount = 0
        self.matchFailedCount = 0
        self.extraOptions = gConfig.getValue(
            '/AgentJobRequirements/ExtraOptions', '')
        #Timeleft
        self.timeLeftUtil = TimeLeft()
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0)
        self.timeLeftError = ''
        self.scaledCPUTime = 0.0
        self.pilotInfoReportedFlag = False
        return S_OK()
示例#8
0
  def initialize( self, loops = 0 ):
    """Sets default parameters and creates CE instance
    """
    # Disable monitoring
    self.am_setOption( 'MonitoringEnabled', False )
    # self.log.setLevel('debug') #temporary for debugging
    self.am_setOption( 'MaxCycles', loops )

    ceType = self.am_getOption( 'CEType', 'InProcess' )
    localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
    if localCE:
      self.log.info( 'Defining CE from local configuration = %s' % localCE )
      ceType = localCE

    ceFactory = ComputingElementFactory()
    self.ceName = ceType
    ceInstance = ceFactory.getCE( ceType )
    if not ceInstance['OK']:
      self.log.warn( ceInstance['Message'] )
      return ceInstance

    self.initTimes = os.times()

    self.computingElement = ceInstance['Value']
    self.diracRoot = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname( __file__ ) ) ) )
    #Localsite options
    self.siteRoot = gConfig.getValue( '/LocalSite/Root', os.getcwd() )
    self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
    self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
    self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
    #Agent options
    # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
    self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
    defaultWrapperLocation = 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py'
    self.jobWrapperTemplate = os.path.join( self.diracRoot,
                                            self.am_getOption( 'JobWrapperTemplate',
                                                                defaultWrapperLocation ) )
    self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
    self.defaultLogLevel = self.am_getOption( 'DefaultLogLevel', 'info' )
    self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
    self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
    self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
    self.jobCount = 0
    self.matchFailedCount = 0
    #Timeleft
    self.timeLeftUtil = TimeLeft()
    self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0 )
    self.gridCEQueue = gConfig.getValue( '/Resources/Computing/CEDefaults/GridCEQueue', '' )
    self.timeLeftError = ''
    self.scaledCPUTime = 0.0
    self.pilotInfoReportedFlag = False
    return S_OK()
示例#9
0
 def addComputingElement(self, ceList):
   """
     Check if a CE object for the current CE is available,
     instantiate one if necessary
   """
   for CE in ceList:
     if CE not in self.computingElementDict:
       ceFactory = ComputingElementFactory( )
       ceInstance = ceFactory.getCE( ceName = CE )
       if not ceInstance['OK']:
         self.log.error('Can not create CE object:', ceInstance['Message'])
         return
       self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict
       # add the 'CE' instance at the end to avoid being overwritten
       self.computingElementDict[CE]['CE'] = ceInstance['Value']
示例#10
0
 def addComputingElement(self, ceList):
   """
     Check if a CE object for the current CE is available,
     instantiate one if necessary
   """
   for CE in ceList:
     if CE not in self.computingElementDict:
       ceFactory = ComputingElementFactory( )
       ceInstance = ceFactory.getCE( ceName = CE )
       if not ceInstance['OK']:
         self.log.error('Can not create CE object:', ceInstance['Message'])
         return
       self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict
       # add the 'CE' instance at the end to avoid being overwritten
       self.computingElementDict[CE]['CE'] = ceInstance['Value']
示例#11
0
  def getQueues( self, resourceDict ):
    """ Get the list of relevant CEs and their descriptions
    """

    self.queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in resourceDict:
      for ce in resourceDict[site]:
        ceDict = resourceDict[site][ce]
        ceTags = ceDict.get( 'Tag' )
        if isinstance( ceTags, basestring ):
          ceTags = fromChar( ceTags )
        qDict = ceDict.pop( 'Queues' )
        for queue in qDict:
          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = qDict[queue]
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = site
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )
          queueTags = self.queueDict[queueName]['ParametersDict'].get( 'Tag' )
          if queueTags and isinstance( queueTags, basestring ):
            queueTags = fromChar( queueTags )
            self.queueDict[queueName]['ParametersDict']['Tag'] = queueTags
          if ceTags:
            if queueTags:
              allTags = list( set( ceTags + queueTags ) )
              self.queueDict[queueName]['ParametersDict']['Tag'] = allTags
            else:
              self.queueDict[queueName]['ParametersDict']['Tag'] = ceTags

          maxMemory = self.queueDict[queueName]['ParametersDict'].get( 'MaxRAM', None )
          if maxMemory:
            # MaxRAM value is supposed to be in MB
            maxMemoryList = range( 1, int( maxMemory )/1000 + 1 )
            memoryTags = [ '%dGB' % mem for mem in maxMemoryList ]
            if memoryTags:
              self.queueDict[queueName]['ParametersDict'].setdefault( 'Tag', [] )
              self.queueDict[queueName]['ParametersDict']['Tag'] += memoryTags
          qwDir = os.path.join( self.workingDirectory, queue )
          if not os.path.exists( qwDir ):
            os.makedirs( qwDir )
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir

          platform = ''
          if "Platform" in self.queueDict[queueName]['ParametersDict']:
            platform = self.queueDict[queueName]['ParametersDict']['Platform']
          elif "Platform" in ceDict:
            platform = ceDict['Platform']
          elif "OS" in ceDict:
            architecture = ceDict.get( 'architecture', 'x86_64' )
            OS = ceDict['OS']
            platform = '_'.join( [architecture, OS] )
          if platform and not platform in self.platforms:
            self.platforms.append( platform )

          if not "Platform" in self.queueDict[queueName]['ParametersDict'] and platform:
            result = Resources.getDIRACPlatform( platform )
            if result['OK']:
              self.queueDict[queueName]['ParametersDict']['Platform'] = result['Value'][0]

          ceQueueDict = dict( ceDict )
          ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] )

          # Generate the CE object for the queue or pick the already existing one
          # if the queue definition did not change
          queueHash = self.__generateQueueHash( ceQueueDict )
          if queueName in self.queueCECache and self.queueCECache[queueName]['Hash'] == queueHash:
            queueCE = self.queueCECache[queueName]['CE']
          else:
            result = ceFactory.getCE( ceName = ce,
                                      ceType = ceDict['CEType'],
                                      ceParametersDict = ceQueueDict )
            if not result['OK']:
              return result
            self.queueCECache.setdefault( queueName, {} )
            self.queueCECache[queueName]['Hash'] = queueHash
            self.queueCECache[queueName]['CE'] = result['Value']
            queueCE = self.queueCECache[queueName]['CE']

          self.queueDict[queueName]['CE'] = queueCE
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceDict['CEType']
          self.queueDict[queueName]['Site'] = site
          self.queueDict[queueName]['QueueName'] = queue
          self.queueDict[queueName]['Platform'] = platform
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            if self.queueDict[queueName]['ParametersDict']['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True
          elif 'BundleProxy' in ceDict:
            if ceDict['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True

          if site not in self.sites:
            self.sites.append( site )

    return S_OK()
  def __getGridJobOutput(self,pilotReference):
    """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

    result = pilotDB.getPilotInfo(pilotReference)
    if not result['OK'] or not result[ 'Value' ]:
      return S_ERROR('Failed to get info for pilot ' + pilotReference)

    pilotDict = result['Value'][pilotReference]
    owner = pilotDict['OwnerDN']
    group = pilotDict['OwnerGroup']

    # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
    result = pilotDB.getPilotOutput(pilotReference)
    if result['OK']:
      stdout = result['Value']['StdOut']
      error = result['Value']['StdErr']
      if stdout or error:
        resultDict = {}
        resultDict['StdOut'] = stdout
        resultDict['StdErr'] = error
        resultDict['OwnerDN'] = owner
        resultDict['OwnerGroup'] = group
        resultDict['FileList'] = []
        return S_OK(resultDict)
      else:
        gLogger.warn( 'Empty pilot output found for %s' % pilotReference )

    gridType = pilotDict['GridType']
    if gridType in ["LCG","gLite","CREAM"]:
      group = getGroupOption(group,'VOMSRole',group)
      ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
      if not ret['OK']:
        gLogger.error( ret['Message'] )
        gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
        return S_ERROR("Failed to get the pilot's owner proxy")
      proxy = ret['Value']

      pilotStamp = pilotDict['PilotStamp']
      result = getPilotOutput( proxy, gridType, pilotReference, pilotStamp )
      if not result['OK']:
        return S_ERROR('Failed to get pilot output: '+result['Message'])
      # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
      stdout = result['StdOut']
      error = result['StdErr']
      fileList = result['FileList']
      if stdout:
        result = pilotDB.storePilotOutput(pilotReference,stdout,error)
        if not result['OK']:
          gLogger.error('Failed to store pilot output:',result['Message'])

      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = fileList
      return S_OK(resultDict)
    else:
      # Instantiate the appropriate CE
      ceFactory = ComputingElementFactory()
      result = getQueue( pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue'] )
      if not result['OK']:
        return result
      queueDict = result['Value']
      result = ceFactory.getCE( gridType, pilotDict['DestinationSite'], queueDict )
      if not result['OK']:
        return result
      ce = result['Value']
      pilotStamp = pilotDict['PilotStamp']
      pRef = pilotReference
      if pilotStamp:
        pRef = pRef + ':::' + pilotStamp
      result = ce.getJobOutput( pRef )
      if not result['OK']:
        return result
      stdout,error = result['Value']
      if stdout:
        result = pilotDB.storePilotOutput(pilotReference,stdout,error)
        if not result['OK']:
          gLogger.error('Failed to store pilot output:',result['Message'])

      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = []
      return S_OK( resultDict )
示例#13
0
  def getQueues( self ):
    """ Get the list of relevant CEs and their descriptions
    """

    ceFactory = ComputingElementFactory()
    ceTypes = self.am_getOption( 'CETypes', [] )
    ceConfList = self.am_getOption( 'CEs', [] )

    for siteName in self.siteNames:
      # Look up CE definitions in the site CS description
      ceList = []
      gridType = siteName.split( '.' )[0]
      result = gConfig.getSections( '/Resources/Sites/%s/%s/CEs' % ( gridType, siteName ) )
      if not result['OK']:
        return S_ERROR( 'Failed to look up the CS for the site %s CEs' % siteName )
      if not result['Value']:
        return S_ERROR( 'No CEs found for site %s' % siteName )
      ceTotalList = result['Value']
      for ce in ceTotalList:
        if ( ceConfList and ce in ceConfList ) or not ceConfList:
          ceType = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/CEType' % ( gridType, siteName, ce ), 'Unknown' )
          result = gConfig.getOptionsDict( '/Resources/Sites/%s/%s/CEs/%s' % ( gridType, siteName, ce ) )
          if not result['OK']:
            return S_ERROR( 'Failed to look up the CS for ce %s' % ce )
          ceDict = result['Value']
          if "SubmissionMode" in ceDict and ceDict['SubmissionMode'].lower() == "direct":
            if ceType in ceTypes:
              ceList.append( ( ce, ceType, ceDict ) )

      for ce, ceType, ceDict in ceList:
        section = '/Resources/Sites/%s/%s/CEs/%s/Queues' % ( gridType, siteName, ce )
        result = gConfig.getSections( section )
        if not result['OK']:
          return S_ERROR( 'Failed to look up the CS for queues' )
        if not result['Value']:
          return S_ERROR( 'No Queues found for site %s, ce %s' % ( siteName, ce ) )

        queues = result['Value']
        for queue in queues:
          result = gConfig.getOptionsDict( '%s/%s' % ( section, queue ) )
          if not result['OK']:
            return S_ERROR( 'Failed to look up the CS for ce,queue %s,%s' % ( ce, queue ) )

          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = result['Value']
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = siteName
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )
          qwDir = os.path.join( self.workingDirectory, queue )
          if not os.path.exists( qwDir ):
            os.mkdir( qwDir )
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir
          queueDict = dict( ceDict )
          queueDict.update( self.queueDict[queueName]['ParametersDict'] )
          result = ceFactory.getCE( ceName = ce,
                                   ceType = ceType,
                                   ceParametersDict = queueDict )
          if not result['OK']:
            return result
          self.queueDict[queueName]['CE'] = result['Value']
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceType
          self.queueDict[queueName]['Site'] = siteName
          self.queueDict[queueName]['QueueName'] = queue
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            self.queueDict[queueName]['BundleProxy'] = True

    return S_OK()
示例#14
0
  def getQueues( self, resourceDict ):
    """ Get the list of relevant CEs and their descriptions
    """

    self.queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in resourceDict:
      for ce in resourceDict[site]:
        ceDict = resourceDict[site][ce]
        ceTags = ceDict.get( 'Tag', [] )
        pilotRunDirectory = ceDict.get( 'PilotRunDirectory', '' )
        if isinstance( ceTags, basestring ):
          ceTags = fromChar( ceTags )
        ceMaxRAM = ceDict.get( 'MaxRAM', None )
        qDict = ceDict.pop( 'Queues' )
        for queue in qDict:
          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = qDict[queue]
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = site
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )

          queueTags = self.queueDict[queueName]['ParametersDict'].get( 'Tag' )
          if queueTags and isinstance( queueTags, basestring ):
            queueTags = fromChar( queueTags )
            self.queueDict[queueName]['ParametersDict']['Tag'] = queueTags
          if ceTags:
            if queueTags:
              allTags = list( set( ceTags + queueTags ) )
              self.queueDict[queueName]['ParametersDict']['Tag'] = allTags
            else:
              self.queueDict[queueName]['ParametersDict']['Tag'] = ceTags

          maxRAM = self.queueDict[queueName]['ParametersDict'].get( 'MaxRAM' )
          maxRAM = ceMaxRAM if not maxRAM else maxRAM
          if maxRAM:
            self.queueDict[queueName]['ParametersDict']['MaxRAM'] = maxRAM
          if pilotRunDirectory:
            self.queueDict[queueName]['ParametersDict']['JobExecDir'] = pilotRunDirectory
          qwDir = os.path.join( self.workingDirectory, queue )
          mkDir(qwDir)
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir
          platform = ''
          if "Platform" in self.queueDict[queueName]['ParametersDict']:
            platform = self.queueDict[queueName]['ParametersDict']['Platform']
          elif "Platform" in ceDict:
            platform = ceDict['Platform']
          elif "OS" in ceDict:
            architecture = ceDict.get( 'architecture', 'x86_64' )
            OS = ceDict['OS']
            platform = '_'.join( [architecture, OS] )
          if platform and not platform in self.platforms:
            self.platforms.append( platform )

          if not "Platform" in self.queueDict[queueName]['ParametersDict'] and platform:
            result = Resources.getDIRACPlatform( platform )
            if result['OK']:
              self.queueDict[queueName]['ParametersDict']['Platform'] = result['Value'][0]

          ceQueueDict = dict( ceDict )
          ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] )

          # Generate the CE object for the queue or pick the already existing one
          # if the queue definition did not change
          queueHash = self.__generateQueueHash( ceQueueDict )
          if queueName in self.queueCECache and self.queueCECache[queueName]['Hash'] == queueHash:
            queueCE = self.queueCECache[queueName]['CE']
          else:
            result = ceFactory.getCE( ceName = ce,
                                      ceType = ceDict['CEType'],
                                      ceParametersDict = ceQueueDict )
            if not result['OK']:
              return result
            self.queueCECache.setdefault( queueName, {} )
            self.queueCECache[queueName]['Hash'] = queueHash
            self.queueCECache[queueName]['CE'] = result['Value']
            queueCE = self.queueCECache[queueName]['CE']

          self.queueDict[queueName]['CE'] = queueCE
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceDict['CEType']
          self.queueDict[queueName]['Site'] = site
          self.queueDict[queueName]['QueueName'] = queue
          self.queueDict[queueName]['Platform'] = platform
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            if self.queueDict[queueName]['ParametersDict']['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True
          elif 'BundleProxy' in ceDict:
            if ceDict['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True

          if site not in self.sites:
            self.sites.append( site )

    return S_OK()
示例#15
0
    def getQueues(self, resourceDict):
        """ Get the list of relevant CEs and their descriptions
    """

        self.queueDict = {}
        ceFactory = ComputingElementFactory()

        for site in resourceDict:
            for ce in resourceDict[site]:
                ceDict = resourceDict[site][ce]
                qDict = ceDict.pop("Queues")
                for queue in qDict:
                    queueName = "%s_%s" % (ce, queue)
                    self.queueDict[queueName] = {}
                    self.queueDict[queueName]["ParametersDict"] = qDict[queue]
                    self.queueDict[queueName]["ParametersDict"]["Queue"] = queue
                    self.queueDict[queueName]["ParametersDict"]["Site"] = site
                    self.queueDict[queueName]["ParametersDict"]["GridEnv"] = self.gridEnv
                    self.queueDict[queueName]["ParametersDict"]["Setup"] = gConfig.getValue("/DIRAC/Setup", "unknown")
                    # Evaluate the CPU limit of the queue according to the Glue convention
                    # To Do: should be a utility
                    if (
                        "maxCPUTime" in self.queueDict[queueName]["ParametersDict"]
                        and "SI00" in self.queueDict[queueName]["ParametersDict"]
                    ):
                        maxCPUTime = float(self.queueDict[queueName]["ParametersDict"]["maxCPUTime"])
                        # For some sites there are crazy values in the CS
                        maxCPUTime = max(maxCPUTime, 0)
                        maxCPUTime = min(maxCPUTime, 86400 * 12.5)
                        si00 = float(self.queueDict[queueName]["ParametersDict"]["SI00"])
                        queueCPUTime = 60.0 / 250.0 * maxCPUTime * si00
                        self.queueDict[queueName]["ParametersDict"]["CPUTime"] = int(queueCPUTime)
                    qwDir = os.path.join(self.workingDirectory, queue)
                    if not os.path.exists(qwDir):
                        os.makedirs(qwDir)
                    self.queueDict[queueName]["ParametersDict"]["WorkingDirectory"] = qwDir

                    platform = ""
                    if "Platform" in self.queueDict[queueName]["ParametersDict"]:
                        platform = self.queueDict[queueName]["ParametersDict"]["Platform"]
                    elif "Platform" in ceDict:
                        platform = ceDict["Platform"]
                    elif "OS" in ceDict:
                        architecture = ceDict.get("architecture", "x86_64")
                        OS = ceDict["OS"]
                        platform = "_".join([architecture, OS])
                    if platform and not platform in self.platforms:
                        self.platforms.append(platform)

                    if not "Platform" in self.queueDict[queueName]["ParametersDict"] and platform:
                        result = Resources.getDIRACPlatform(platform)
                        if result["OK"]:
                            self.queueDict[queueName]["ParametersDict"]["Platform"] = result["Value"]

                    ceQueueDict = dict(ceDict)
                    ceQueueDict.update(self.queueDict[queueName]["ParametersDict"])
                    result = ceFactory.getCE(ceName=ce, ceType=ceDict["CEType"], ceParametersDict=ceQueueDict)
                    if not result["OK"]:
                        return result
                    self.queueDict[queueName]["CE"] = result["Value"]
                    self.queueDict[queueName]["CEName"] = ce
                    self.queueDict[queueName]["CEType"] = ceDict["CEType"]
                    self.queueDict[queueName]["Site"] = site
                    self.queueDict[queueName]["QueueName"] = queue
                    result = self.queueDict[queueName]["CE"].isValid()
                    if not result["OK"]:
                        self.log.fatal(result["Message"])
                        return result
                    if "BundleProxy" in self.queueDict[queueName]["ParametersDict"]:
                        self.queueDict[queueName]["BundleProxy"] = True
                    elif "BundleProxy" in ceDict:
                        self.queueDict[queueName]["BundleProxy"] = True

                    if site not in self.sites:
                        self.sites.append(site)

        return S_OK()