Exemplo n.º 1
0
 def findSE(self, se):
     return S_OK(se)
Exemplo n.º 2
0
 def setTransFlavour(self, flavour):
   self.flavour = flavour
   return S_OK()
Exemplo n.º 3
0
 def setExtraname(self, extraname):
   self.extraname = extraname
   return S_OK()
Exemplo n.º 4
0
 def setMetaValues(self, values):
   if isinstance(values, list):
     self.metaValues = values
   else:
     self.metaValues = [val for val in values.split(",")]
   return S_OK()
Exemplo n.º 5
0
 def setMetadata(self, metadata):
   for pair in metadata.split(','):
     splitPair = pair.strip().split(':')
     if len(splitPair) == 2:
       self.extraData[splitPair[0]] = splitPair[1].strip()
   return S_OK()
Exemplo n.º 6
0
 def release( self, lockName ):
   try:
     self.__locks[ lockName ].release()
   except ValueError:
     return S_ERROR( "No lock named %s" % lockName )
   return S_OK()
Exemplo n.º 7
0
  def export_getJobPageSummaryWeb( self, selectDict, sortList, startItem, maxItems, selectJobs = True ):
    """ Get the summary of the job information for a given page in the
        job monitor in a generic format
    """
    resultDict = {}
    startDate = selectDict.get( 'FromDate', None )
    if startDate:
      del selectDict['FromDate']
    # For backward compatibility
    if startDate is None:
      startDate = selectDict.get( 'LastUpdate', None )
      if startDate:
        del selectDict['LastUpdate']
    endDate = selectDict.get( 'ToDate', None )
    if endDate:
      del selectDict['ToDate']

    result = self.jobPolicy.getControlledUsers( RIGHT_GET_INFO )
    if not result['OK']:
      return S_ERROR( 'Failed to evaluate user rights' )
    if result['Value'] != 'ALL':
      selectDict[ ( 'Owner','OwnerGroup' ) ] = result['Value']

    # Sorting instructions. Only one for the moment.
    if sortList:
      orderAttribute = sortList[0][0] + ":" + sortList[0][1]
    else:
      orderAttribute = None

    statusDict = {}
    result = gJobDB.getCounters( 'Jobs', ['Status'], selectDict,
                               newer = startDate,
                               older = endDate,
                               timeStamp = 'LastUpdateTime' )

    nJobs = 0
    if result['OK']:
      for stDict, count in result['Value']:
        nJobs += count
        statusDict[stDict['Status']] = count

    resultDict['TotalRecords'] = nJobs
    if nJobs == 0:
      return S_OK( resultDict )

    resultDict['Extras'] = statusDict

    if selectJobs:
      iniJob = startItem
      if iniJob >= nJobs:
        return S_ERROR( 'Item number out of range' )

      result = gJobDB.selectJobs( selectDict, orderAttribute = orderAttribute,
                                newer = startDate, older = endDate, limit = ( maxItems, iniJob ) )
      if not result['OK']:
        return S_ERROR( 'Failed to select jobs: ' + result['Message'] )

      summaryJobList = result['Value']
      if not self.globalJobsInfo:      
        validJobs, invalidJobs, nonauthJobs, ownJobs = self.jobPolicy.evaluateJobRights( summaryJobList,
                                                                                         RIGHT_GET_INFO )
        summaryJobList = validJobs
      
      result = gJobDB.getAttributesForJobList( summaryJobList, SUMMARY )
      if not result['OK']:
        return S_ERROR( 'Failed to get job summary: ' + result['Message'] )

      summaryDict = result['Value']

      # Evaluate last sign of life time
      for jobID, jobDict in summaryDict.items():
        if jobDict['HeartBeatTime'] == 'None':
          jobDict['LastSignOfLife'] = jobDict['LastUpdateTime']
        else:
          lastTime = Time.fromString( jobDict['LastUpdateTime'] )
          hbTime = Time.fromString( jobDict['HeartBeatTime'] )
          if ( hbTime - lastTime ) > ( lastTime - lastTime ) or jobDict['Status'] == "Stalled":
            jobDict['LastSignOfLife'] = jobDict['HeartBeatTime']
          else:
            jobDict['LastSignOfLife'] = jobDict['LastUpdateTime']

      tqDict = {}
      result = gTaskQueueDB.getTaskQueueForJobs( summaryJobList )
      if result['OK']:
        tqDict = result['Value']

      # prepare the standard structure now
      key = summaryDict.keys()[0]
      paramNames = summaryDict[key].keys()

      records = []
      for jobID, jobDict in summaryDict.items():
        jParList = []
        for pname in paramNames:
          jParList.append( jobDict[pname] )
        jParList.append( tqDict.get( jobID, 0 ) )
        records.append( jParList )

      resultDict['ParameterNames'] = paramNames + ['TaskQueueID']
      resultDict['Records'] = records

    return S_OK( resultDict )
Exemplo n.º 8
0
  def __resolveInputData( self ):
    """This method controls the execution of the DIRAC input data modules according
       to the VO policy defined in the configuration service.
    """
    if self.arguments['Configuration'].has_key( 'SiteName' ):
      site = self.arguments['Configuration']['SiteName']
    else:
      site = DIRAC.siteName()

    policy = []
    if not self.arguments.has_key( 'Job' ):
      self.arguments['Job'] = {}

    if self.arguments['Job'].has_key( 'InputDataPolicy' ):
      policy = self.arguments['Job']['InputDataPolicy']
      #In principle this can be a list of modules with the first taking precedence
      if type( policy ) in types.StringTypes:
        policy = [policy]
      self.log.info( 'Job has a specific policy setting: %s' % ( ', '.join( policy ) ) )
    else:
      self.log.verbose( 'Attempting to resolve input data policy for site %s' % site )
      inputDataPolicy = gConfig.getOptionsDict( '/Operations/InputDataPolicy' )
      if not inputDataPolicy['OK']:
        return S_ERROR( 'Could not resolve InputDataPolicy from /Operations/InputDataPolicy' )

      options = inputDataPolicy['Value']
      if options.has_key( site ):
        policy = options[site]
        policy = [x.strip() for x in policy.split( ',' )]
        self.log.info( 'Found specific input data policy for site %s:\n%s' % ( site, '\n'.join( policy ) ) )
      elif options.has_key( 'Default' ):
        policy = options['Default']
        policy = [x.strip() for x in policy.split( ',' )]
        self.log.info( 'Applying default input data policy for site %s:\n%s' % ( site, '\n'.join( policy ) ) )

    dataToResolve = None #if none, all supplied input data is resolved
    allDataResolved = False
    successful = {}
    failedReplicas = []
    for modulePath in policy:
      if not allDataResolved:
        result = self.__runModule( modulePath, dataToResolve )
        if not result['OK']:
          self.log.warn( 'Problem during %s execution' % modulePath )
          return result

        if result.has_key( 'Failed' ):
          failedReplicas = result['Failed']

        if failedReplicas:
          self.log.info( '%s failed for the following files:\n%s'
                         % ( modulePath, '\n'.join( failedReplicas ) ) )
          dataToResolve = failedReplicas
        else:
          self.log.info( 'All replicas resolved after %s execution' % ( modulePath ) )
          allDataResolved = True

        successful.update( result['Successful'] )
        self.log.verbose( successful )

    result = S_OK()
    result['Successful'] = successful
    result['Failed'] = failedReplicas
    return result
Exemplo n.º 9
0
  def downloadSandbox( self, sbLocation, destinationDir = "", inMemory = False, unpack = True ):
    """
    Download a sandbox file and keep it in bundled form
    """
    if sbLocation.find( "SB:" ) != 0:
      return S_ERROR( "Invalid sandbox URL" )
    sbLocation = sbLocation[ 3: ]
    sbSplit = sbLocation.split( "|" )
    if len( sbSplit ) < 2:
      return S_ERROR( "Invalid sandbox URL" )
    SEName = sbSplit[0]
    SEPFN = "|".join( sbSplit[1:] )
    # If destination dir is not specified use current working dir
    # If its defined ensure the dir structure is there
    if not destinationDir:
      destinationDir = os.getcwd()
    else:
      mkDir(destinationDir)

    try:
      tmpSBDir = tempfile.mkdtemp( prefix = "TMSB." )
    except Exception as e:
      return S_ERROR( "Cannot create temporal file: %s" % str( e ) )

    se = StorageElement( SEName, vo = self.__vo )
    result = returnSingleResult( se.getFile( SEPFN, localPath = tmpSBDir ) )

    if not result[ 'OK' ]:
      return result
    sbFileName = os.path.basename( SEPFN )

    result = S_OK()
    tarFileName = os.path.join( tmpSBDir, sbFileName )

    if inMemory:
      try:
        tfile = open( tarFileName, 'r' )
        data = tfile.read()
        tfile.close()
        os.unlink( tarFileName )
        os.rmdir( tmpSBDir )
      except Exception as e:
        os.unlink( tarFileName )
        os.rmdir( tmpSBDir )
        return S_ERROR( 'Failed to read the sandbox archive: %s' % str( e ) )
      return S_OK( data )

    if not unpack:
      result[ 'Value' ] = tarFileName
      return result

    try:
      sandboxSize = 0
      tf = tarfile.open( name = tarFileName, mode = "r" )
      for tarinfo in tf:
        tf.extract( tarinfo, path = destinationDir )
        sandboxSize += tarinfo.size
      tf.close()
      result[ 'Value' ] = sandboxSize
    except Exception as e:
      result = S_ERROR( "Could not open bundle: %s" % str( e ) )

    try:
      os.unlink( tarFileName )
      os.rmdir( tmpSBDir )
    except Exception as e:
      gLogger.warn( "Could not remove temporary dir %s: %s" % ( tmpSBDir, str( e ) ) )

    return result
Exemplo n.º 10
0
                                                                     'AlternativeBDIIs', 'VO']}),
 ('DIRAC.ConfigurationSystem.Agent.GOCDB2CSAgent', {'IgnoreOptions': ['Cycles', 'DryRun',
                                                                      'UpdatePerfSONARS']}),
 ('DIRAC.ConfigurationSystem.Agent.VOMS2CSAgent', {'IgnoreOptions': ['VO']}),
 ('DIRAC.DataManagementSystem.Agent.FTS3Agent', {}),
 ('DIRAC.FrameworkSystem.Agent.CAUpdateAgent', {}),
 ('DIRAC.FrameworkSystem.Agent.MyProxyRenewalAgent', {'IgnoreOptions': ['MinValidity', 'ValidityPeriod',
                                                                        'MinimumLifeTime',
                                                                        'RenewedLifeTime']}),
 ('DIRAC.FrameworkSystem.Agent.ErrorMessageMonitor', {}),
 ('DIRAC.FrameworkSystem.Agent.SystemLoggingDBCleaner', {'IgnoreOptions': ['RemoveDate']}),
 ('DIRAC.FrameworkSystem.Agent.TopErrorMessagesReporter', {}),
 ('DIRAC.RequestManagementSystem.Agent.CleanReqDBAgent', {}),
 ('DIRAC.RequestManagementSystem.Agent.RequestExecutingAgent', {'IgnoreOptions': ['FTSMode',
                                                                                  'OperationHandlers'],
                                                                'SpecialMocks': {'gConfig': S_OK([])}}),
 ('DIRAC.ResourceStatusSystem.Agent.CacheFeederAgent', {}),
 ('DIRAC.ResourceStatusSystem.Agent.ElementInspectorAgent', {}),
 ('DIRAC.ResourceStatusSystem.Agent.EmailAgent', {}),
 ('DIRAC.ResourceStatusSystem.Agent.SiteInspectorAgent', {}),
 ('DIRAC.ResourceStatusSystem.Agent.SummarizeLogsAgent', {}),
 ('DIRAC.ResourceStatusSystem.Agent.TokenAgent', {}),
 ('DIRAC.StorageManagementSystem.Agent.RequestFinalizationAgent', {}),
 ('DIRAC.StorageManagementSystem.Agent.RequestPreparationAgent', {}),
 ('DIRAC.StorageManagementSystem.Agent.StageMonitorAgent', {}),
 ('DIRAC.StorageManagementSystem.Agent.StageRequestAgent', {'IgnoreOptions': ['PinLifetime']}),
 ('DIRAC.TransformationSystem.Agent.DataRecoveryAgent', {}),
 ('DIRAC.TransformationSystem.Agent.InputDataAgent', {'IgnoreOptions': ['DateKey', 'TransformationTypes']}),
 ('DIRAC.TransformationSystem.Agent.MCExtensionAgent', {'IgnoreOptions': ['TransformationTypes',
                                                                          'TasksPerIteration',
                                                                          'MaxFailureRate',
Exemplo n.º 11
0
def getClockDeviation(serverList=None):
  result = getNTPUTCTime(serverList)
  if not result['OK']:
    return result
  td = datetime.datetime.utcnow() - result['Value']
  return S_OK(abs(td.days * 86400 + td.seconds))
Exemplo n.º 12
0
    def removeJobsByStatus(self, condDict, delay=False):
        """ Remove deleted jobs
    """
        if delay:
            gLogger.verbose("Removing jobs with %s and older than %s day(s)" %
                            (condDict, delay))
            result = self.jobDB.selectJobs(condDict,
                                           older=delay,
                                           limit=self.maxJobsAtOnce)
        else:
            gLogger.verbose("Removing jobs with %s " % condDict)
            result = self.jobDB.selectJobs(condDict, limit=self.maxJobsAtOnce)

        if not result['OK']:
            return result

        jobList = result['Value']
        if len(jobList) > self.maxJobsAtOnce:
            jobList = jobList[:self.maxJobsAtOnce]
        if not jobList:
            return S_OK()

        self.log.notice("Deleting %s jobs for %s" % (len(jobList), condDict))

        count = 0
        error_count = 0
        result = SandboxStoreClient(useCertificates=True).unassignJobs(jobList)
        if not result['OK']:
            gLogger.error("Cannot unassign jobs to sandboxes",
                          result['Message'])
            return result

        result = self.deleteJobOversizedSandbox(jobList)
        if not result['OK']:
            gLogger.error("Cannot schedule removal of oversized sandboxes",
                          result['Message'])
            return result

        failedJobs = result['Value']['Failed']
        for job in failedJobs:
            jobList.pop(jobList.index(job))

        # TODO: we should not remove a job if it still has requests in the RequestManager.
        # But this logic should go in the client or in the service, and right now no service expose jobDB.removeJobFromDB

        if self.jobByJob:
            for jobID in jobList:
                resultJobDB = self.jobDB.removeJobFromDB(jobID)
                resultTQ = self.taskQueueDB.deleteJob(jobID)
                resultLogDB = self.jobLoggingDB.deleteJob(jobID)
                errorFlag = False
                if not resultJobDB['OK']:
                    gLogger.warn('Failed to remove job %d from JobDB' % jobID,
                                 result['Message'])
                    errorFlag = True
                if not resultTQ['OK']:
                    gLogger.warn(
                        'Failed to remove job %d from TaskQueueDB' % jobID,
                        result['Message'])
                    errorFlag = True
                if not resultLogDB['OK']:
                    gLogger.warn(
                        'Failed to remove job %d from JobLoggingDB' % jobID,
                        result['Message'])
                    errorFlag = True
                if errorFlag:
                    error_count += 1
                else:
                    count += 1
                if self.throttlingPeriod:
                    time.sleep(self.throttlingPeriod)
        else:
            result = self.jobDB.removeJobFromDB(jobList)
            if not result['OK']:
                gLogger.error('Failed to delete %d jobs from JobDB' %
                              len(jobList))
            else:
                gLogger.info('Deleted %d jobs from JobDB' % len(jobList))

            for jobID in jobList:
                resultTQ = self.taskQueueDB.deleteJob(jobID)
                if not resultTQ['OK']:
                    gLogger.warn(
                        'Failed to remove job %d from TaskQueueDB' % jobID,
                        resultTQ['Message'])
                    error_count += 1
                else:
                    count += 1

            result = self.jobLoggingDB.deleteJob(jobList)
            if not result['OK']:
                gLogger.error('Failed to delete %d jobs from JobLoggingDB' %
                              len(jobList))
            else:
                gLogger.info('Deleted %d jobs from JobLoggingDB' %
                             len(jobList))

        if count > 0 or error_count > 0:
            gLogger.info('Deleted %d jobs from JobDB, %d errors' %
                         (count, error_count))
        return S_OK()
Exemplo n.º 13
0
    def submitJob(self, executableFile, proxy=None, numberOfJobs=1):
        """ Method to submit job
    """
        self.createClient()
        # Check if the client is ready
        if not self.BOINCClient:
            return S_ERROR('Soap client is not ready')

        self.log.verbose("Executable file path: %s" % executableFile)

        # if no proxy is supplied, the executable can be submitted directly
        # otherwise a wrapper script is needed to get the proxy to the execution node
        # The wrapper script makes debugging more complicated and thus it is
        # recommended to transfer a proxy inside the executable if possible.
        wrapperContent = ''
        if proxy:
            self.log.verbose('Setting up proxy for payload')

            compressedAndEncodedProxy = base64.encodestring(
                bz2.compress(proxy.dumpAllToString()['Value'])).replace(
                    '\n', '')
            compressedAndEncodedExecutable = base64.encodestring(
                bz2.compress(open(executableFile, "rb").read(),
                             9)).replace('\n', '')

            wrapperContent = """#!/bin/bash
/usr/bin/env python << EOF
# Wrapper script for executable and proxy
import os
import tempfile
import sys
import base64
import bz2
import shutil
import stat
try:
  workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'TORQUE_' )
  os.chdir( workingDirectory )
  open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) )
  open( '%(executable)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) )
  os.chmod('proxy',stat.S_IRUSR | stat.S_IWUSR)
  os.chmod('%(executable)s',stat.S_IRWXU)
  os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy')
except Exception as x:
  print >> sys.stderr, x
  sys.exit(-1)
cmd = "./%(executable)s"
print 'Executing: ', cmd
sys.stdout.flush()
os.system( cmd )

shutil.rmtree( workingDirectory )

EOF
""" % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \
              'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \
              'executable': os.path.basename( executableFile ) }

            fd, name = tempfile.mkstemp(suffix='_pilotwrapper.py',
                                        prefix='DIRAC_',
                                        dir=os.getcwd())
            os.close(fd)
            submitFile = name

        else:  # no proxy
            submitFile = executableFile
            wrapperContent = self._fromFileToStr(submitFile)

        if not wrapperContent:
            self.log.error('Executable file is empty.')
            return S_ERROR('Executable file is empty.')

        #Some special symbol can not be transported by xml,
        #such as less, greater, amp. So, base64 is used here.
        wrapperContent = base64.encodestring(wrapperContent).replace("\n", '')

        prefix = os.path.splitext(os.path.basename(submitFile))[0].replace(
            '_pilotwrapper', '').replace('DIRAC_', '')
        batchIDList = []
        stampDict = {}
        for i in range(0, numberOfJobs):
            jobID = "%s_%d@%s" % (prefix, i, self.suffix)
            try:
                #  print jobID + "\n" + wrapperContent
                #  print self.BOINCClient
                result = self.BOINCClient.service.submitJob(
                    jobID, wrapperContent, self.ceParameters['Platform'][0],
                    self.ceParameters['MarketPlaceID'])
            except:
                self.log.error('Could not submit the pilot to the BOINC CE',
                               'Pilot %s, BOINC CE %s' % (jobID, self.wsdl))
                break

            if not result['ok']:
                self.log.warn(
                    'Didn\'t submit the pilot %s to the BOINC CE %s, the value returned is false!'
                    % (jobID, self.wsdl))
                break

            self.log.verbose('Submit the pilot %s to the BOINC CE %s' %
                             (jobID, self.wsdl))
            diracStamp = "%s_%d" % (prefix, i)
            batchIDList.append(jobID)
            stampDict[jobID] = diracStamp

        if batchIDList:
            resultRe = S_OK(batchIDList)
            resultRe['PilotStampDict'] = stampDict
        else:
            resultRe = S_ERROR('Submit no pilot to BOINC CE %s' % self.wsdl)
        return resultRe
Exemplo n.º 14
0
 def addSE(self, se):
     return S_OK(se)
Exemplo n.º 15
0
def setVerbose( optVal ):
  global verbose
  verbose = True
  return S_OK()
Exemplo n.º 16
0
    def getTimeLeft(self, cpuConsumed=0.0, processors=1):
        """ Returns the CPU Time Left for supported batch systems.
        The CPUConsumed is the current raw total CPU.
    """
        # Quit if no scale factor available
        if not self.scaleFactor:
            return S_ERROR(
                '/LocalSite/CPUScalingFactor not defined for site %s' %
                DIRAC.siteName())

        if not self.batchPlugin:
            return S_ERROR(self.batchError)

        resourceDict = self.batchPlugin.getResourceUsage()
        if not resourceDict['OK']:
            self.log.warn(
                'Could not determine timeleft for batch system at site %s' %
                DIRAC.siteName())
            return resourceDict

        resources = resourceDict['Value']
        self.log.debug("self.batchPlugin.getResourceUsage(): %s" %
                       str(resources))
        if not resources.get('CPULimit') and not resources.get(
                'WallClockLimit'):
            # This should never happen
            return S_ERROR('No CPU or WallClock limit obtained')

        # if one of CPULimit or WallClockLimit is missing, compute a reasonable value
        if not resources.get('CPULimit'):
            resources['CPULimit'] = resources['WallClockLimit'] * processors
        elif not resources.get('WallClockLimit'):
            resources['WallClockLimit'] = resources['CPULimit'] / processors

        # if one of CPU or WallClock is missing, compute a reasonable value
        if not resources.get('CPU'):
            resources['CPU'] = resources['WallClock'] * processors
        elif not resources.get('WallClock'):
            resources['WallClock'] = resources['CPU'] / processors

        timeLeft = 0.
        cpu = float(resources['CPU'])
        cpuLimit = float(resources['CPULimit'])
        wallClock = float(resources['WallClock'])
        wallClockLimit = float(resources['WallClockLimit'])
        batchSystemTimeUnit = resources.get('Unit', 'Both')

        # Some batch systems rely on wall clock time and/or cpu time to make allocations
        if batchSystemTimeUnit == 'WallClock':
            time = wallClock
            timeLimit = wallClockLimit
        else:
            time = cpu
            timeLimit = cpuLimit

        if time and cpuConsumed > 3600. and self.normFactor:
            # If there has been more than 1 hour of consumed CPU and
            # there is a Normalization set for the current CPU
            # use that value to renormalize the values returned by the batch system
            # NOTE: cpuConsumed is non-zero for call by the JobAgent and 0 for call by the watchdog
            # cpuLimit and cpu may be in the units of the batch system, not real seconds...
            # (in this case the other case won't work)
            # therefore renormalise it using cpuConsumed (which is in real seconds)
            cpuWorkLeft = (timeLimit -
                           time) * self.normFactor * cpuConsumed / time
        elif self.normFactor:
            # FIXME: this is always used by the watchdog... Also used by the JobAgent
            #        if consumed less than 1 hour of CPU
            # It was using self.scaleFactor but this is inconsistent: use the same as above
            # In case the returned cpu and cpuLimit are not in real seconds, this is however rubbish
            cpuWorkLeft = (timeLimit - time) * self.normFactor
        else:
            # Last resort recovery...
            cpuWorkLeft = (timeLimit - time) * self.scaleFactor

        self.log.verbose('Remaining CPU in normalized units is: %.02f' %
                         timeLeft)
        return S_OK(cpuWorkLeft)
Exemplo n.º 17
0
def setTaskQueueID( optVal ):
  global taskQueueID
  taskQueueID = long( optVal )
  return S_OK()
Exemplo n.º 18
0
                instance.update()
                while instance.state != u'running':
                    if instance.state == u'terminated':
                        self.log.error(
                            "New instance terminated while starting",
                            "AMI: %s" % self.__vmAMI)
                        continue
                    self.log.info(
                        "Sleeping for 10 secs for instance %s (current state %s)"
                        % (instance, instance.state))
                    time.sleep(10)
                    instance.update()
                if instance.state != u'terminated':
                    self.log.info("Instance %s started" % instance.id)
            idList.append(instance.id)
        return S_OK(idList)

    def __startSpotInstances(self, numImages, instanceType,
                             waitForConfirmation):
        self.log.info("Starting %d new spot instances for AMI %s (type %s)" %
                      (numImages, self.__vmAMI, instanceType))
        try:
            spotInstanceRequests = self.__conn.request_spot_instances(
                price="%f" % self.__vmMaxAllowedPrice,
                image_id=self.__vmAMI,
                count=numImages,
                instance_type=instanceType)
            self.log.verbose("Got %d spot instance requests" %
                             len(spotInstanceRequests))
        except Exception, e:
            return S_ERROR("Could not start spot instances: %s" % str(e))
Exemplo n.º 19
0
 def export_getJobsStatus ( jobIDs ):
   if not jobIDs:
     return S_OK( {} )
   return gJobDB.getAttributesForJobList( jobIDs, ['Status'] )
Exemplo n.º 20
0
  def export_updateSoftware(self, version, rootPath="", gridVersion=""):
    """ Update the local DIRAC software installation to version
    """

    # Check that we have a sane local configuration
    result = gConfig.getOptionsDict('/LocalInstallation')
    if not result['OK']:
      return S_ERROR('Invalid installation - missing /LocalInstallation section in the configuration')
    elif not result['Value']:
      return S_ERROR('Invalid installation - empty /LocalInstallation section in the configuration')

    if rootPath and not os.path.exists(rootPath):
      return S_ERROR('Path "%s" does not exists' % rootPath)

    cmdList = ['dirac-install', '-r', version, '-t', 'server']
    if rootPath:
      cmdList.extend(['-P', rootPath])

    # Check if there are extensions
    extensionList = getCSExtensions()
    if extensionList:
      # by default we do not install WebApp
      if "WebApp" in extensionList:
        extensionList.remove("WebApp")

    webPortal = gConfig.getValue('/LocalInstallation/WebApp', False)  # this is the new portal
    if webPortal:
      if "WebAppDIRAC" not in extensionList:
        extensionList.append("WebAppDIRAC")

    cmdList += ['-e', ','.join(extensionList)]

    project = gConfig.getValue('/LocalInstallation/Project')
    if project:
      cmdList += ['-l', project]

    # Are grid middleware bindings required ?
    if gridVersion:
      cmdList.extend(['-g', gridVersion])

    targetPath = gConfig.getValue('/LocalInstallation/TargetPath',
                                  gConfig.getValue('/LocalInstallation/RootPath', ''))
    if targetPath and os.path.exists(targetPath + '/etc/dirac.cfg'):
      cmdList.append(targetPath + '/etc/dirac.cfg')
    else:
      return S_ERROR('Local configuration not found')

    result = systemCall(240, cmdList)
    if not result['OK']:
      return result
    status = result['Value'][0]
    if status != 0:
      # Get error messages
      error = []
      output = result['Value'][1].split('\n')
      for line in output:
        line = line.strip()
        if 'error' in line.lower():
          error.append(line)
      if error:
        message = '\n'.join(error)
      else:
        message = "Failed to update software to %s" % version
      return S_ERROR(message)

    return S_OK()
Exemplo n.º 21
0
    def prepareNewReplicas(self):
        """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
        res = self.__getNewReplicas()
        if not res['OK']:
            gLogger.fatal(
                "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.info("There were no New replicas found")
            return res
        replicas = res['Value']['Replicas']
        replicaIDs = res['Value']['ReplicaIDs']
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation."
            % len(replicaIDs))

        # Check that the files exist in the FileCatalog
        res = self.__getExistingFiles(replicas.keys())
        if not res['OK']:
            return res
        exist = res['Value']['Exist']
        terminal = res['Value']['Missing']
        failed = res['Value']['Failed']
        if not exist:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine existance of any files'
            )
            return S_OK()
        terminalReplicaIDs = {}
        for lfn, reason in terminal.items():
            for se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog."
            % len(exist))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog."
                % len(terminal))

        # Obtain the file sizes from the FileCatalog
        res = self.__getFileSize(exist)
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroSize']
        fileSizes = res['Value']['FileSizes']
        if not fileSizes:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog."
            % len(fileSizes))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog."
                % len(terminal))

        # Obtain the replicas from the FileCatalog
        res = self.__getFileReplicas(fileSizes.keys())
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroReplicas']
        fileReplicas = res['Value']['Replicas']
        if not fileReplicas:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog."
            % len(fileReplicas))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog."
                % len(terminal))

        # Check the replicas exist at the requested site
        replicaMetadata = []
        for lfn, requestedSEs in replicas.items():
            lfnReplicas = fileReplicas[lfn]
            for requestedSE, replicaID in requestedSEs.items():
                if not requestedSE in lfnReplicas.keys():
                    terminalReplicaIDs[
                        replicaID] = "LFN not registered at requested SE"
                    replicas[lfn].pop(requestedSE)
                else:
                    replicaMetadata.append(
                        (replicaID, lfnReplicas[requestedSE], fileSizes[lfn]))

        # Update the states of the files in the database
        if terminalReplicaIDs:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed."
                % len(terminalReplicaIDs))
            #res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
            res = self.storageDB.updateReplicaFailure(terminalReplicaIDs)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica failures.",
                    res['Message'])
        if replicaMetadata:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated."
                % len(replicaMetadata))
            # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
            res = self.storageDB.updateReplicaInformation(replicaMetadata)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.",
                    res['Message'])
        return S_OK()
Exemplo n.º 22
0
 def export_getProject(self):
   result = loadDIRACCFG()
   if not result['OK']:
     return result
   _cfgPath, diracCFG = result['Value']
   return S_OK(diracCFG.getOption("/LocalInstallation/Project", "DIRAC"))
Exemplo n.º 23
0
 def setMetaKey(self, key):
   self.metaKey = key
   return S_OK()
Exemplo n.º 24
0
  def export_checkComponentLog(self, component):
    """ Check component log for errors
    """
    componentList = []
    if '*' in component:
      if component == '*':
        result = gComponentInstaller.getSetupComponents()
        if result['OK']:
          for ctype in ['Services', 'Agents', 'Executors']:
            if ctype in result['Value']:
              for sname in result['Value'][ctype]:
                for cname in result['Value'][ctype][sname]:
                  componentList.append('/'.join([sname, cname]))
    elif isinstance(component, basestring):
      componentList = [component]
    else:
      componentList = component

    resultDict = {}
    for comp in componentList:
      if '/' not in comp:
        continue
      system, cname = comp.split('/')

      startDir = gComponentInstaller.startDir
      currentLog = startDir + '/' + system + '_' + cname + '/log/current'
      try:
        logFile = file(currentLog, 'r')
      except IOError as err:
        gLogger.error("File does not exists:", currentLog)
        resultDict[comp] = {'ErrorsHour': -1, 'ErrorsDay': -1, 'LastError': currentLog + '::' + repr(err)}
        continue

      logLines = logFile.readlines()
      logFile.close()

      errors_1 = 0
      errors_24 = 0
      now = dateTime()
      lastError = ''
      for line in logLines:
        if "ERROR:" in line:
          fields = line.split()
          recent = False
          if len(fields) < 2:  # if the line contains only one word
            lastError = line.split('ERROR:')[-1].strip()
            continue
          timeStamp = fromString(fields[0] + ' ' + fields[1])
          if not timeStamp:  # if the timestamp is missing in the log
            lastError = line.split('ERROR:')[-1].strip()
            continue
          if (now - timeStamp) < hour:
            errors_1 += 1
            recent = True
          if (now - timeStamp) < day:
            errors_24 += 1
            recent = True
          if recent:
            lastError = line.split('ERROR:')[-1].strip()

      resultDict[comp] = {'ErrorsHour': errors_1, 'ErrorsDay': errors_24, 'LastError': lastError}

    return S_OK(resultDict)
Exemplo n.º 25
0
 def setSourceSE(self, sourceSE):
   self.sourceSE = [sSE.strip() for sSE in sourceSE.split(",")]
   return S_OK()
Exemplo n.º 26
0
  def __readHostInfo():
    """ Get host current loads, memory, etc
    """

    result = dict()
    # Memory info
    re_parser = re.compile(r'^(?P<key>\S*):\s*(?P<value>\d*)\s*kB')
    for line in open('/proc/meminfo'):
      match = re_parser.match(line)
      if not match:
        continue
      key, value = match.groups(['key', 'value'])
      result[key] = int(value)

    for mtype in ['Mem', 'Swap']:
      memory = int(result.get(mtype + 'Total'))
      mfree = int(result.get(mtype + 'Free'))
      if memory > 0:
        percentage = float(memory - mfree) / float(memory) * 100.
      else:
        percentage = 0
      name = 'Memory'
      if mtype == "Swap":
        name = 'Swap'
      result[name] = '%.1f%%/%.1fMB' % (percentage, memory / 1024.)

    # Loads
    l1, l5, l15 = (str(lx) for lx in os.getloadavg())
    result['Load1'] = l1
    result['Load5'] = l5
    result['Load15'] = l15
    result['Load'] = '/'.join([l1, l5, l15])

    # CPU info
    with open('/proc/cpuinfo', 'r') as fd:
      lines = fd.readlines()
      processors = 0
      physCores = {}
      for line in lines:
        if line.strip():
          parameter, value = line.split(':')
          parameter = parameter.strip()
          value = value.strip()
          if parameter.startswith('processor'):
            processors += 1
          if parameter.startswith('physical id'):
            physCores[value] = parameter
          if parameter.startswith('model name'):
            result['CPUModel'] = value
          if parameter.startswith('cpu MHz'):
            result['CPUClock'] = value
      result['Cores'] = processors
      result['PhysicalCores'] = len(physCores)

    # Disk occupancy
    summary = ''
    _status, output = commands.getstatusoutput('df')
    lines = output.split('\n')
    for i in xrange(len(lines)):
      if lines[i].startswith('/dev'):
        fields = lines[i].split()
        if len(fields) == 1:
          fields += lines[i + 1].split()
        _disk = fields[0].replace('/dev/sd', '')
        partition = fields[5]
        occupancy = fields[4]
        summary += ",%s:%s" % (partition, occupancy)
    result['DiskOccupancy'] = summary[1:]
    result['RootDiskSpace'] = Os.getDiskSpace(rootPath)

    # Open files
    puser = getpass.getuser()
    _status, output = commands.getstatusoutput('lsof')
    pipes = 0
    files = 0
    sockets = 0
    lines = output.split('\n')
    for line in lines:
      fType = line.split()[4]
      user = line.split()[2]
      if user == puser:
        if fType in ['REG']:
          files += 1
        elif fType in ['unix', 'IPv4']:
          sockets += 1
        elif fType in ['FIFO']:
          pipes += 1
    result['OpenSockets'] = sockets
    result['OpenFiles'] = files
    result['OpenPipes'] = pipes

    infoResult = gComponentInstaller.getInfo()
    if infoResult['OK']:
      result.update(infoResult['Value'])
      # the infoResult value is {"Extensions":{'a1':'v1',a2:'v2'}; we convert to a string
      result.update({"Extensions": ";".join(["%s:%s" % (key, value)
                                             for (key, value) in infoResult["Value"].get('Extensions').iteritems()])})

    # Host certificate properties
    certFile, _keyFile = getHostCertificateAndKeyLocation()
    chain = X509Chain()
    chain.loadChainFromFile(certFile)
    resultCert = chain.getCredentials()
    if resultCert['OK']:
      result['SecondsLeft'] = resultCert['Value']['secondsLeft']
      result['CertificateValidity'] = str(timedelta(seconds=resultCert['Value']['secondsLeft']))
      result['CertificateDN'] = resultCert['Value']['subject']
      result['HostProperties'] = resultCert['Value']['groupProperties']
      result['CertificateIssuer'] = resultCert['Value']['issuer']

    # Host uptime
    result['Uptime'] = str(timedelta(seconds=(time.time() - psutil.boot_time())))

    return S_OK(result)
Exemplo n.º 27
0
 def setTargetSE(self, targetSE):
   self.targetSE = [tSE.strip() for tSE in targetSE.split(",")]
   return S_OK()
Exemplo n.º 28
0
    def initializeHandler(cls, serviceInfoDict):
        """Initialization of DB object"""

        cls.dataIntegrityDB = DataIntegrityDB(parentLogger=cls.log)
        return S_OK()
Exemplo n.º 29
0
 def setGroupSize(self, size):
   try:
     self.groupSize = int(size)
   except ValueError:
     return S_ERROR("Expected integer for groupsize")
   return S_OK()
Exemplo n.º 30
0
    def getSEDefinition(self, seID):
        """ Get the Storage Element definition
    """
        if isinstance(seID, str):
            result = self.getSEID(seID)
            if not result['OK']:
                return result
            seID = result['Value']

        if seID in self.db.seDefinitions:
            if (time.time() - self.db.seDefinitions[seID]['LastUpdate']
                ) < self.seUpdatePeriod:
                if self.db.seDefinitions[seID]['SEDict']:
                    return S_OK(self.db.seDefinitions[seID])
            se = self.db.seDefinitions[seID]['SEName']
        else:
            result = self.getSEName(seID)
            if not result['OK']:
                return result
            se = result['Value']
            self.db.seDefinitions[seID] = {}
            self.db.seDefinitions[seID]['SEName'] = se
            self.db.seDefinitions[seID]['SEDict'] = {}
            self.db.seDefinitions[seID]['LastUpdate'] = 0.

        # We have to refresh the SE definition from the CS
        result = gConfig.getSections('/Resources/StorageElements/%s' % se)
        if not result['OK']:
            return result
        pluginSection = result['Value'][0]
        result = gConfig.getOptionsDict('/Resources/StorageElements/%s/%s' %
                                        (se, pluginSection))
        if not result['OK']:
            return result
        seDict = result['Value']
        self.db.seDefinitions[seID]['SEDict'] = seDict
        # Get VO paths if any
        voPathDict = None
        result = gConfig.getOptionsDict(
            '/Resources/StorageElements/%s/%s/VOPath' % (se, pluginSection))
        if result['OK']:
            voPathDict = result['Value']
        if seDict:
            # A.T. Ports can be multiple, this can be better done using the Storage plugin
            # to provide the replica prefix to keep implementations in one place
            if 'Port' in seDict:
                ports = seDict['Port']
                if ',' in ports:
                    portList = [x.strip() for x in ports.split(',')]
                    random.shuffle(portList)
                    seDict['Port'] = portList[0]
            tmpDict = dict(seDict)
            tmpDict['FileName'] = ''
            result = pfnunparse(tmpDict)
            if result['OK']:
                self.db.seDefinitions[seID]['SEDict']['PFNPrefix'] = result[
                    'Value']
            if voPathDict is not None:
                for vo in voPathDict:
                    tmpDict['Path'] = voPathDict[vo]
                    result = pfnunparse(tmpDict)
                    if result['OK']:
                        self.db.seDefinitions[seID]['SEDict'].setdefault(
                            "VOPrefix", {})[vo] = result['Value']
        self.db.seDefinitions[seID]['LastUpdate'] = time.time()
        return S_OK(self.db.seDefinitions[seID])