def __updateSharedSESites( self, jobState, stageSite, stagedLFNs, opData ): siteCandidates = opData[ 'SiteCandidates' ] seStatus = {} result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption( 'VirtualOrganization' ) for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose( "Checking %s for shared SEs" % siteName ) siteData = siteCandidates[ siteName ] result = getSEsForSite( siteName ) if not result[ 'OK' ]: continue closeSEs = result[ 'Value' ] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement( seName, vo = vo ) result = seObj.getStatus() if not result['OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) continue seStatus[ seName ] = result[ 'Value' ] # get the SE status from mem and add it if its disk status = seStatus[ seName ] if status['Read'] and status['DiskSE']: diskSEs.append( seName ) self.jobLog.verbose( "Disk SEs for %s are %s" % ( siteName, ", ".join( diskSEs ) ) ) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData['Value']['Value']['Successful'] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[ seName ]: self.jobLog.verbose( "Checking %s for %s" % ( seName, lfn ) ) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[ lfn ]: if siteSE in diskSEs: self.jobLog.verbose( "%s on disk for %s" % ( lfn, siteSE ) ) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose( "Setting LFN to disk for %s" % ( seName ) ) siteData[ 'disk' ] += 1 siteData[ 'tape' ] -= 1 return S_OK()
def __getSEStatus(self, seName): result = self.__SEStatus.get(seName) if result == False: seObj = StorageElement(seName) result = seObj.getStatus() if not result["OK"]: return result self.__SEStatus.add(seName, 600, result) return result
def __updateSharedSESites(self, jobState, stageSite, stagedLFNs, opData): siteCandidates = opData["SiteCandidates"] seStatus = {} for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose("Checking %s for shared SEs" % siteName) siteData = siteCandidates[siteName] result = getSEsForSite(siteName) if not result["OK"]: continue closeSEs = result["Value"] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement(seName) result = seObj.getStatus() if not result["OK"]: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result["Message"])) continue seStatus[seName] = result["Value"] # get the SE status from mem and add it if its disk status = seStatus[seName] if status["Read"] and status["DiskSE"]: diskSEs.append(seName) self.jobLog.verbose("Disk SEs for %s are %s" % (siteName, ", ".join(diskSEs))) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData["Value"]["Value"]["Successful"] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[seName]: self.jobLog.verbose("Checking %s for %s" % (seName, lfn)) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[lfn]: if siteSE in diskSEs: self.jobLog.verbose("%s on disk for %s" % (lfn, siteSE)) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose("Setting LFN to disk for %s" % (seName)) siteData["disk"] += 1 siteData["tape"] -= 1 return S_OK()
def __updateOtherSites( self, job, stagingSite, stagedLFNsPerSE, optInfo ): """ Update Optimizer Info for other sites for which the SE on which we have staged Files are declared local """ updated = False seDict = {} for site, siteDict in optInfo['SiteCandidates'].items(): if stagingSite == site: continue closeSEs = getSEsForSite( site ) if not closeSEs['OK']: continue closeSEs = closeSEs['Value'] siteDiskSEs = [] for se in closeSEs: if se not in seDict: try: storageElement = StorageElement( se ) seDict[se] = storageElement.getStatus()['Value'] except Exception: self.log.exception( 'Failed to instantiate StorageElement( %s )' % se ) continue seStatus = seDict[se] if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append( se ) for lfn, replicas in optInfo['Value']['Value']['Successful'].items(): for stageSE, stageLFNs in stagedLFNsPerSE.items(): if lfn in stageLFNs and stageSE in closeSEs: # The LFN has been staged, we need to check now if this SE is close # to the Site and if the LFN was not already on a Disk SE at the Site isOnDisk = False for se in replicas: if se in siteDiskSEs: isOnDisk = True if not isOnDisk: # This is updating optInfo updated = True siteDict['disk'] += 1 siteDict['tape'] -= 1 break if updated: self.log.verbose( 'Updating %s Optimizer Info for Job %s:' % ( self.dataAgentName, job ), optInfo ) self.setOptimizerJobInfo( job, self.dataAgentName, optInfo )
def __updateOtherSites(self, job, stagingSite, stagedLFNsPerSE, optInfo): """ Update Optimizer Info for other sites for which the SE on which we have staged Files are declared local """ updated = False for site, siteDict in optInfo["SiteCandidates"].items(): if stagingSite == site: continue closeSEs = getSEsForSite(site) if not closeSEs["OK"]: continue closeSEs = closeSEs["Value"] siteDiskSEs = [] for se in closeSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()["Value"] if seStatus["Read"] and seStatus["DiskSE"]: siteDiskSEs.append(se) for lfn, replicas in optInfo["Value"]["Value"]["Successful"].items(): for stageSE, stageLFNs in stagedLFNsPerSE.items(): if lfn in stageLFNs and stageSE in closeSEs: # The LFN has been staged, we need to check now if this SE is close # to the Site and if the LFN was not already on a Disk SE at the Site isOnDisk = False for se in replicas: if se in siteDiskSEs: isOnDisk = True if not isOnDisk: # This is updating optInfo updated = True siteDict["disk"] += 1 siteDict["tape"] -= 1 break if updated: self.setOptimizerJobInfo(job, self.dataAgentName, optInfo)
def __preRequestStaging( self, jobState, stageSite, opData ): from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers tapeSEs = [] diskSEs = [] result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption( 'VirtualOrganization' ) inputDataPolicy = manifest.getOption( 'InputDataPolicy', 'Protocol' ) connectionLevel = 'DOWNLOAD' if 'download' in inputDataPolicy.lower() else 'PROTOCOL' # Allow staging from SEs accessible by protocol result = DMSHelpers( vo = vo ).getSEsForSite( stageSite, connectionLevel = connectionLevel ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] for seName in siteSEs: se = StorageElement( seName, vo = vo ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] # Check SEs seStage = [] for seName in replicas: if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True # 4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) return S_OK( stageLFNs )
def __preRequestStaging( self, jobState, stageSite, opData ): result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement( seName ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) #I swear this is horrible DM code it's not mine. #Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] #Check SEs seStage = [] for seName in replicas: if seName in diskSEs: #This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: #This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) #Check if any LFN is in more than one SE #If that's the case, try to stage from the SE that has more LFNs to stage to group the request #1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False #2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: #3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True #4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) return stageLFNs
def __getSiteCandidates( self, inputData ): """This method returns a list of possible site candidates based on the job input data requirement. For each site candidate, the number of files on disk and tape is resolved. """ fileSEs = {} for lfn, replicas in inputData.items(): siteList = [] for se in replicas.keys(): sites = self.__getSitesForSE( se ) if sites['OK']: siteList += sites['Value'] fileSEs[lfn] = uniqueElements( siteList ) siteCandidates = [] i = 0 for fileName, sites in fileSEs.items(): if not i: siteCandidates = sites else: tempSite = [] for site in siteCandidates: if site in sites: tempSite.append( site ) siteCandidates = tempSite i += 1 if not len( siteCandidates ): return S_ERROR( 'No candidate sites available' ) #In addition, check number of files on tape and disk for each site #for optimizations during scheduling siteResult = {} for site in siteCandidates: siteResult[site] = { 'disk': [], 'tape': [] } seDict = {} for lfn, replicas in inputData.items(): for se in replicas.keys(): if se not in seDict: sites = self.__getSitesForSE( se ) if not sites['OK']: continue try: storageElement = StorageElement( se ) seDict[se] = { 'Sites': sites['Value'], 'Status': storageElement.getStatus()['Value'] } except Exception: self.log.exception( 'Failed to instantiate StorageElement( %s )' % se ) continue for site in seDict[se]['Sites']: if site in siteCandidates: if seDict[se]['Status']['Read'] and seDict[se]['Status']['DiskSE']: if lfn not in siteResult[site]['disk']: siteResult[site]['disk'].append( lfn ) if seDict[se]['Status']['Read'] and seDict[se]['Status']['TapeSE']: if lfn not in siteResult[site]['tape']: siteResult[site]['tape'].append( lfn ) for site in siteResult: siteResult[site]['disk'] = len( siteResult[site]['disk'] ) siteResult[site]['tape'] = len( siteResult[site]['tape'] ) return S_OK( siteResult )
def __submit( self, request, operation, toSubmit ): """ create and submit new FTSJobs using list of FTSFiles :param Request request: ReqDB.Request instance :param list ftsFiles: list of FTSFile instances :return: [ FTSJob, FTSJob, ...] """ log = self.log.getSubLogger( "req_%s/%s/submit" % ( request.RequestID, request.RequestName ) ) bySourceAndTarget = {} for ftsFile in toSubmit: if ftsFile.SourceSE not in bySourceAndTarget: bySourceAndTarget.setdefault( ftsFile.SourceSE, {} ) if ftsFile.TargetSE not in bySourceAndTarget[ftsFile.SourceSE]: bySourceAndTarget[ftsFile.SourceSE].setdefault( ftsFile.TargetSE, [] ) bySourceAndTarget[ftsFile.SourceSE][ftsFile.TargetSE].append( ftsFile ) ftsJobs = [] for source, targetDict in bySourceAndTarget.iteritems(): for target, ftsFileList in targetDict.iteritems(): log.info( "found %s files to submit from %s to %s" % ( len( ftsFileList ), source, target ) ) route = self.__ftsPlacement.findRoute( source, target ) if not route["OK"]: log.error( route["Message"] ) continue route = route["Value"] routeValid = self.__ftsPlacement.isRouteValid( route ) if not routeValid['OK']: log.error( "Route invalid : %s" % routeValid['Message'] ) continue sourceSE = StorageElement( source ) sourceToken = sourceSE.getStorageParameters( "SRM2" ) if not sourceToken["OK"]: log.error( "unable to get sourceSE parameters:", "(%s) %s" % ( source, sourceToken["Message"] ) ) continue seStatus = sourceSE.getStatus()['Value'] targetSE = StorageElement( target ) targetToken = targetSE.getStorageParameters( "SRM2" ) if not targetToken["OK"]: log.error( "unable to get targetSE parameters:", "(%s) %s" % ( target, targetToken["Message"] ) ) continue # # create FTSJob for fileList in breakListIntoChunks( ftsFileList, self.MAX_FILES_PER_JOB ): ftsJob = FTSJob() ftsJob.RequestID = request.RequestID ftsJob.OperationID = operation.OperationID ftsJob.SourceSE = source ftsJob.TargetSE = target ftsJob.SourceToken = sourceToken["Value"].get( "SpaceToken", "" ) ftsJob.TargetToken = targetToken["Value"].get( "SpaceToken", "" ) ftsJob.FTSServer = route.ftsServer for ftsFile in fileList: ftsFile.Attempt += 1 ftsFile.Error = "" ftsJob.addFile( ftsFile ) submit = ftsJob.submitFTS( self.__ftsVersion, command = self.SUBMIT_COMMAND, pinTime = self.PIN_TIME if seStatus['TapeSE'] else 0 ) if not submit["OK"]: log.error( "unable to submit FTSJob:", submit["Message"] ) continue log.info( "FTSJob '%s'@'%s' has been submitted" % ( ftsJob.FTSGUID, ftsJob.FTSServer ) ) # # update statuses for job files for ftsFile in ftsJob: ftsFile.FTSGUID = ftsJob.FTSGUID ftsFile.Status = "Submitted" ftsFile.Attempt += 1 # # update placement route try: self.updateLock().acquire() self.__ftsPlacement.startTransferOnRoute( route ) finally: self.updateLock().release() ftsJobs.append( ftsJob ) log.info( "%s new FTSJobs have been submitted" % len( ftsJobs ) ) return S_OK( ftsJobs )
def __preRequestStaging(self, jobState, stageSite, opData): from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers tapeSEs = [] diskSEs = [] result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption('VirtualOrganization') inputDataPolicy = manifest.getOption('InputDataPolicy', 'Protocol') connectionLevel = 'DOWNLOAD' if 'download' in inputDataPolicy.lower( ) else 'PROTOCOL' # Allow staging from SEs accessible by protocol result = DMSHelpers(vo=vo).getSEsForSite( stageSite, connectionLevel=connectionLevel) if not result['OK']: return S_ERROR('Could not determine SEs for site %s' % stageSite) siteSEs = result['Value'] for seName in siteSEs: se = StorageElement(seName, vo=vo) result = se.getStatus() if not result['OK']: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result['Message'])) return S_ERROR("Cannot retrieve SE status") seStatus = result['Value'] if seStatus['Read'] and seStatus['TapeSE']: tapeSEs.append(seName) if seStatus['Read'] and seStatus['DiskSE']: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.verbose("Tape SEs are %s" % (", ".join(tapeSEs))) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[lfn] # Check SEs seStage = [] for seName in replicas: if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs.keys()])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if len(stageLFNs[seName]) == 0: stageLFNs.pop(seName) return S_OK(stageLFNs)
def __setStagingRequest(self, job, destination, inputDataDict): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose('Destination site %s' % (destination)) self.log.verbose('Input Data: %s' % (inputDataDict)) destinationSEs = getSEsForSite(destination) if not destinationSEs['OK']: return S_ERROR('Could not determine SEs for site %s' % destination) destinationSEs = destinationSEs['Value'] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()['Value'] if seStatus['Read'] and seStatus['TapeSE']: siteTapeSEs.append(se) if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append(se) if not siteTapeSEs: return S_ERROR('No LocalSEs For Site') self.log.verbose('Site tape SEs: %s' % (', '.join(siteTapeSEs))) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict['Value']['Value']['Successful'] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if se not in siteTapeSEs: # this File is not being staged continue if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update({se: surl}) if not stageLfns.has_key(se): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append(lfn) # NEW WAY # Now we need to check is any LFN is in more than one SE if len(stageLfns) > 1: stageSEs = sorted([(len(stageLfns[se]), se) for se in stageLfns.keys()]) for lfn in stageSURLs: lfnFound = False for se in [item[1] for item in reversed(stageSEs)]: # for ( numberOfLfns, se ) in reversed( stageSEs ): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove(lfn) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', job) if request['OK']: self.jobDB.setJobParameter(int(job), 'StageRequest', str(request['Value'])) if not request['OK']: self.log.error('Problem sending Staging request:') self.log.error(request) return S_ERROR('Error Sending Staging Request') else: self.log.info('Staging request successfully sent') result = self.updateJobStatus(job, self.stagingStatus, self.stagingMinorStatus, "Unknown") if not result['OK']: return result return S_OK(stageLfns)
def __preRequestStaging(self, jobManifest, stageSite, opData): tapeSEs = [] diskSEs = [] vo = jobManifest.getOption("VirtualOrganization") inputDataPolicy = jobManifest.getOption("InputDataPolicy", "Protocol") connectionLevel = "DOWNLOAD" if "download" in inputDataPolicy.lower() else "PROTOCOL" # Allow staging from SEs accessible by protocol result = DMSHelpers(vo=vo).getSEsForSite(stageSite, connectionLevel=connectionLevel) if not result["OK"]: return S_ERROR("Could not determine SEs for site %s" % stageSite) siteSEs = result["Value"] for seName in siteSEs: se = StorageElement(seName, vo=vo) seStatus = se.getStatus() if not seStatus["OK"]: return seStatus seStatus = seStatus["Value"] if seStatus["Read"] and seStatus["TapeSE"]: tapeSEs.append(seName) if seStatus["Read"] and seStatus["DiskSE"]: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.debug("Tape SEs are %s" % (", ".join(tapeSEs))) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData["Value"]["Value"]["Successful"] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[lfn] # Check SEs seStage = [] for seName in replicas: if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed(sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if not stageLFNs[seName]: stageLFNs.pop(seName) return S_OK(stageLFNs)
def __submit( self, request, operation, toSubmit ): """ create and submit new FTSJobs using list of FTSFiles :param Request request: ReqDB.Request instance :param list ftsFiles: list of FTSFile instances :return: [ FTSJob, FTSJob, ...] """ log = self.log.getSubLogger( "req_%s/%s/submit" % ( request.RequestID, request.RequestName ) ) bySourceAndTarget = {} for ftsFile in toSubmit: if ftsFile.SourceSE not in bySourceAndTarget: bySourceAndTarget.setdefault( ftsFile.SourceSE, {} ) if ftsFile.TargetSE not in bySourceAndTarget[ftsFile.SourceSE]: bySourceAndTarget[ftsFile.SourceSE].setdefault( ftsFile.TargetSE, [] ) bySourceAndTarget[ftsFile.SourceSE][ftsFile.TargetSE].append( ftsFile ) ftsJobs = [] for source, targetDict in bySourceAndTarget.iteritems(): for target, ftsFileList in targetDict.iteritems(): log.info( "found %s files to submit from %s to %s" % ( len( ftsFileList ), source, target ) ) route = self.__ftsPlacement.findRoute( source, target ) if not route["OK"]: log.error( route["Message"] ) continue route = route["Value"] routeValid = self.__ftsPlacement.isRouteValid( route ) if not routeValid['OK']: log.error( "Route invalid : %s" % routeValid['Message'] ) continue sourceSE = StorageElement( source ) sourceToken = sourceSE.getStorageParameters( protocol = 'srm' ) if not sourceToken["OK"]: log.error( "unable to get sourceSE parameters:", "(%s) %s" % ( source, sourceToken["Message"] ) ) continue seStatus = sourceSE.getStatus()['Value'] targetSE = StorageElement( target ) targetToken = targetSE.getStorageParameters( protocol = 'srm' ) if not targetToken["OK"]: log.error( "unable to get targetSE parameters:", "(%s) %s" % ( target, targetToken["Message"] ) ) continue # # create FTSJob for fileList in breakListIntoChunks( ftsFileList, self.MAX_FILES_PER_JOB ): ftsJob = FTSJob() ftsJob.RequestID = request.RequestID ftsJob.OperationID = operation.OperationID ftsJob.SourceSE = source ftsJob.TargetSE = target ftsJob.SourceToken = sourceToken["Value"].get( "SpaceToken", "" ) ftsJob.TargetToken = targetToken["Value"].get( "SpaceToken", "" ) ftsJob.FTSServer = route.ftsServer for ftsFile in fileList: ftsFile.Attempt += 1 ftsFile.Error = "" ftsJob.addFile( ftsFile ) submit = ftsJob.submitFTS( self.__ftsVersion, command = self.SUBMIT_COMMAND, pinTime = self.PIN_TIME if seStatus['TapeSE'] else 0 ) if not submit["OK"]: log.error( "unable to submit FTSJob:", submit["Message"] ) continue log.info( "FTSJob '%s'@'%s' has been submitted" % ( ftsJob.FTSGUID, ftsJob.FTSServer ) ) # # update statuses for job files for ftsFile in ftsJob: ftsFile.FTSGUID = ftsJob.FTSGUID ftsFile.Status = "Submitted" ftsFile.Attempt += 1 # # update placement route try: self.updateLock().acquire() self.__ftsPlacement.startTransferOnRoute( route ) finally: self.updateLock().release() ftsJobs.append( ftsJob ) log.info( "%s new FTSJobs have been submitted" % len( ftsJobs ) ) return S_OK( ftsJobs )
def __requestStaging( self, jobState, stageSite, opData ): result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement( seName ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) #I swear this is horrible DM code it's not mine. #Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] #Check SEs seStage = [] for seName in replicas: surl = replicas[ seName ] if seName in diskSEs: #This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: #This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) #Check if any LFN is in more than one SE #If that's the case, try to stage from the SE that has more LFNs to stage to group the request #1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False #2.- Traverse the SEs for stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: #3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True #4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) self.jobLog.verbose( "Stage request will be \n\t%s" % "\n\t".join( [ "%s:%s" % ( lfn, stageLFNs[ lfn ] ) for lfn in stageLFNs ] ) ) stagerClient = StorageManagerClient() result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', int( jobState.jid ) ) if not result[ 'OK' ]: self.jobLog.error( "Could not send stage request: %s" % result[ 'Message' ] ) return S_ERROR( "Problem sending staging request" ) rid = str( result[ 'Value' ] ) self.jobLog.info( "Stage request %s sent" % rid ) jobState.setParameter( "StageRequest", rid ) result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result return S_OK( stageLFNs )
def __setStagingRequest(self, job, destination, inputDataDict): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose("Destination site %s" % (destination)) self.log.verbose("Input Data: %s" % (inputDataDict)) destinationSEs = getSEsForSite(destination) if not destinationSEs["OK"]: return S_ERROR("Could not determine SEs for site %s" % destination) destinationSEs = destinationSEs["Value"] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()["Value"] if seStatus["Read"] and seStatus["TapeSE"]: siteTapeSEs.append(se) if seStatus["Read"] and seStatus["DiskSE"]: siteDiskSEs.append(se) if not siteTapeSEs: return S_ERROR("No LocalSEs For Site") self.log.verbose("Site tape SEs: %s" % (", ".join(siteTapeSEs))) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict["Value"]["Value"]["Successful"] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update({se: surl}) if not stageLfns.has_key(se): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append(lfn) # NEW WAY # Now we need to check is any LFN is in more than one SE if len(stageLfns) > 1: stageSEs = sorted([(len(stageLfns[se]), se) for se in stageLfns.keys()]) for lfn in stageSURLs: lfnFound = False for (se, numberOfLfns) in reversed(stageSEs): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove(lfn) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, "WorkloadManagement", "updateJobFromStager@WorkloadManagement/JobStateUpdate", job ) if request["OK"]: self.jobDB.setJobParameter(int(job), "StageRequest", str(request["Value"])) if not request["OK"]: self.log.error("Problem sending Staging request:") self.log.error(request) return S_ERROR("Error Sending Staging Request") else: self.log.info("Staging request successfully sent") result = self.updateJobStatus(job, self.stagingStatus, self.stagingMinorStatus) if not result["OK"]: return result return S_OK(stageLfns)
def __updateSharedSESites(self, jobState, stageSite, stagedLFNs, opData): siteCandidates = opData['SiteCandidates'] seStatus = {} result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption('VirtualOrganization') for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose("Checking %s for shared SEs" % siteName) siteData = siteCandidates[siteName] result = getSEsForSite(siteName) if not result['OK']: continue closeSEs = result['Value'] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement(seName, vo=vo) result = seObj.getStatus() if not result['OK']: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result['Message'])) continue seStatus[seName] = result['Value'] # get the SE status from mem and add it if its disk status = seStatus[seName] if status['Read'] and status['DiskSE']: diskSEs.append(seName) self.jobLog.verbose("Disk SEs for %s are %s" % (siteName, ", ".join(diskSEs))) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData['Value']['Value']['Successful'] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[seName]: self.jobLog.verbose("Checking %s for %s" % (seName, lfn)) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[lfn]: if siteSE in diskSEs: self.jobLog.verbose("%s on disk for %s" % (lfn, siteSE)) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose("Setting LFN to disk for %s" % (seName)) siteData['disk'] += 1 siteData['tape'] -= 1 return S_OK()
def __requestStaging( self, jobState, stageSite, opData ): result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement( seName ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) #I swear this is horrible DM code it's not mine. #Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] #Check SEs seStage = [] for seName in replicas: surl = replicas[ seName ] if seName in diskSEs: #This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: #This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) #Check if any LFN is in more than one SE #If that's the case, try to stage from the SE that has more LFNs to stage to group the request #1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False #2.- Traverse the SEs for stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: #3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True #4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) self.jobLog.verbose( "Stage request will be \n\t%s" % "\n\t".join( [ "%s:%s" % ( lfn, stageLFNs[ lfn ] ) for lfn in stageLFNs ] ) ) stagerClient = StorageManagerClient() result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request To Be Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', int( jobState.jid ) ) if not result[ 'OK' ]: self.jobLog.error( "Could not send stage request: %s" % result[ 'Message' ] ) return S_ERROR( "Problem sending staging request" ) rid = str( result[ 'Value' ] ) self.jobLog.info( "Stage request %s sent" % rid ) jobState.setParameter( "StageRequest", rid ) result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result return S_OK( stageLFNs )
def __getSiteCandidates(self, okReplicas, vo): """ This method returns a list of possible site candidates based on the job input data requirement. For each site candidate, the number of files on disk and tape is resolved. """ lfnSEs = {} for lfn in okReplicas: replicas = okReplicas[lfn] siteSet = set() for seName in replicas: result = self.__getSitesForSE(seName) if not result['OK']: self.jobLog.warn("Could not get sites for SE", "%s: %s" % (seName, result['Message'])) return result siteSet.update(result['Value']) lfnSEs[lfn] = siteSet if not lfnSEs: return S_ERROR(JobMinorStatus.NO_CANDIDATE_SITE_FOUND) # This makes an intersection of all sets in the dictionary and returns a set with it siteCandidates = set.intersection(*[lfnSEs[lfn] for lfn in lfnSEs]) if not siteCandidates: return S_ERROR(JobMinorStatus.NO_CANDIDATE_SITE_FOUND) # In addition, check number of files on tape and disk for each site # for optimizations during scheduling sitesData = {} for siteName in siteCandidates: sitesData[siteName] = {'disk': set(), 'tape': set()} # Loop time! seDict = {} for lfn in okReplicas: replicas = okReplicas[lfn] # Check each SE in the replicas for seName in replicas: # If not already "loaded" the add it to the dict if seName not in seDict: result = self.__getSitesForSE(seName) if not result['OK']: self.jobLog.warn( "Could not get sites for SE", "%s: %s" % (seName, result['Message'])) continue siteList = result['Value'] seObj = StorageElement(seName, vo=vo) result = seObj.getStatus() if not result['OK']: self.jobLog.error("Failed to get SE status", result['Message']) return result seDict[seName] = { 'Sites': siteList, 'Status': result['Value'] } # Get SE info from the dict seData = seDict[seName] siteList = seData['Sites'] seStatus = seData['Status'] for siteName in siteList: # If not a candidate site then skip it if siteName not in siteCandidates: continue # Add the LFNs to the disk/tape lists diskLFNs = sitesData[siteName]['disk'] tapeLFNs = sitesData[siteName]['tape'] if seStatus['DiskSE']: # Sets contain only unique elements, no need to check if it's there diskLFNs.add(lfn) if lfn in tapeLFNs: tapeLFNs.remove(lfn) if seStatus['TapeSE']: if lfn not in diskLFNs: tapeLFNs.add(lfn) for siteName in sitesData: sitesData[siteName]['disk'] = len(sitesData[siteName]['disk']) sitesData[siteName]['tape'] = len(sitesData[siteName]['tape']) return S_OK(sitesData)
def __setStagingRequest( self, job, destination, inputDataDict ): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose( 'Destination site %s' % ( destination ) ) self.log.verbose( 'Input Data: %s' % ( inputDataDict ) ) destinationSEs = getSEsForSite( destination ) if not destinationSEs['OK']: return S_ERROR( 'Could not determine SEs for site %s' % destination ) destinationSEs = destinationSEs['Value'] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement( se ) seStatus = storageElement.getStatus()['Value'] if seStatus['Read'] and seStatus['TapeSE']: siteTapeSEs.append( se ) if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append( se ) if not siteTapeSEs: return S_ERROR( 'No LocalSEs For Site' ) self.log.verbose( 'Site tape SEs: %s' % ( ', '.join( siteTapeSEs ) ) ) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict['Value']['Value']['Successful'] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if se not in siteTapeSEs: # this File is not being staged continue if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update( {se:surl} ) if not stageLfns.has_key( se ): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append( lfn ) # NEW WAY # Now we need to check is any LFN is in more than one SE if len( stageLfns ) > 1: stageSEs = sorted( [ ( len( stageLfns[se] ), se ) for se in stageLfns.keys() ] ) for lfn in stageSURLs: lfnFound = False for se in [ item[1] for item in reversed( stageSEs ) ]: # for ( numberOfLfns, se ) in reversed( stageSEs ): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove( lfn ) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', job ) if request['OK']: self.jobDB.setJobParameter( int( job ), 'StageRequest', str( request['Value'] ) ) if not request['OK']: self.log.error( 'Problem sending Staging request:' ) self.log.error( request ) return S_ERROR( 'Error Sending Staging Request' ) else: self.log.info( 'Staging request successfully sent' ) result = self.updateJobStatus( job, self.stagingStatus, self.stagingMinorStatus, "Unknown" ) if not result['OK']: return result return S_OK( stageLfns )
def _checkFilesToStage(seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, checkOnlyTapeSEs=None, jobLog=None, proxyUserName=None, proxyUserGroup=None, executionLock=None): """ Checks on SEs whether the file is NEARLINE or ONLINE onlineLFNs, offlineLFNs and absentLFNs are modified to contain the files found online If checkOnlyTapeSEs is True, disk replicas are not checked As soon as a replica is found Online for a file, no further check is made """ # Only check on storage if it is a tape SE if jobLog is None: logger = gLogger else: logger = jobLog if checkOnlyTapeSEs is None: # Default value is True checkOnlyTapeSEs = True failed = {} for se, lfnsInSEList in seToLFNs.iteritems(): # If we have found already all files online at another SE, no need to check the others # but still we want to set the SE as Online if not a TapeSE vo = getVOForGroup(proxyUserGroup) seObj = StorageElement(se, vo=vo) status = seObj.getStatus() if not status['OK']: return status tapeSE = status['Value']['TapeSE'] diskSE = status['Value']['DiskSE'] # If requested to check only Tape SEs and the file is at a diskSE, we guess it is Online... filesToCheck = [] for lfn in lfnsInSEList: # If the file had already been found accessible at an SE, only check that this one is on disk diskIsOK = checkOnlyTapeSEs or (lfn in onlineLFNs) if diskIsOK and diskSE: onlineLFNs.setdefault(lfn, []).append(se) elif not diskIsOK or (tapeSE and (lfn not in onlineLFNs)): filesToCheck.append(lfn) if not filesToCheck: continue # Wrap the SE method with executeWithUserProxy fileMetadata = (executeWithUserProxy(seObj.getFileMetadata)( filesToCheck, proxyUserName=proxyUserName, proxyUserGroup=proxyUserGroup, executionLock=executionLock)) if not fileMetadata['OK']: failed[se] = dict.fromkeys(filesToCheck, fileMetadata['Message']) else: if fileMetadata['Value']['Failed']: failed[se] = fileMetadata['Value']['Failed'] # is there at least one replica online? for lfn, mDict in fileMetadata['Value']['Successful'].iteritems(): # SRM returns Cached, but others may only return Accessible if mDict.get('Cached', mDict['Accessible']): onlineLFNs.setdefault(lfn, []).append(se) elif tapeSE: # A file can be staged only at Tape SE offlineLFNs.setdefault(lfn, []).append(se) else: # File not available at a diskSE... we shall retry later pass # Doesn't matter if some files are Offline if they are also online for lfn in set(offlineLFNs) & set(onlineLFNs): offlineLFNs.pop(lfn) # If the file was found staged, ignore possible errors, but print out errors for se, failedLfns in failed.items(): logger.error("Errors when getting files metadata", 'at %s' % se) for lfn, reason in failedLfns.items(): if lfn in onlineLFNs: logger.warn(reason, 'for %s, but there is an online replica' % lfn) failed[se].pop(lfn) else: logger.error(reason, 'for %s, no online replicas' % lfn) if cmpError(reason, errno.ENOENT): absentLFNs.setdefault(lfn, []).append(se) failed[se].pop(lfn) if not failed[se]: failed.pop(se) # Find the files that do not exist at SE if failed: logger.error( "Error getting metadata", "for %d files" % len(set(lfn for lfnList in failed.itervalues() for lfn in lfnList))) for lfn in absentLFNs: seList = absentLFNs[lfn] # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR(errno.ENOENT, "File not at %s" % ','.join(seList))['Message'] # Format the error for absent files return S_OK()
def __getSiteCandidates( self, okReplicas ): """This method returns a list of possible site candidates based on the job input data requirement. For each site candidate, the number of files on disk and tape is resolved. """ lfnSEs = {} for lfn in okReplicas: replicas = okReplicas[ lfn ] siteSet = set() for seName in replicas: result = self.__getSitesForSE( seName ) if result['OK']: siteSet.update( result['Value'] ) lfnSEs[ lfn ] = siteSet if not lfnSEs: return S_ERROR( "No candidate sites available" ) #This makes an intersection of all sets in the dictionary and returns a set with it siteCandidates = set.intersection( *[ lfnSEs[ lfn ] for lfn in lfnSEs ] ) if not siteCandidates: return S_ERROR( 'No candidate sites available' ) #In addition, check number of files on tape and disk for each site #for optimizations during scheduling sitesData = {} for siteName in siteCandidates: sitesData[ siteName ] = { 'disk': set(), 'tape': set() } #Loop time! seDict = {} for lfn in okReplicas: replicas = okReplicas[ lfn ] #Check each SE in the replicas for seName in replicas: #If not already "loaded" the add it to the dict if seName not in seDict: result = self.__getSitesForSE( seName ) if not result['OK']: self.jobLog.warn( "Could not get sites for SE %s: %s" % ( seName, result[ 'Message' ] ) ) continue siteList = result[ 'Value' ] seObj = StorageElement( seName ) result = seObj.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Could not retrieve status for SE %s: %s" % ( seName, result[ 'Message' ] ) ) continue seStatus = result[ 'Value' ] seDict[ seName ] = { 'Sites': siteList, 'Status': seStatus } #Get SE info from the dict seData = seDict[ seName ] siteList = seData[ 'Sites' ] seStatus = seData[ 'Status' ] for siteName in siteList: #If not a candidate site then skip it if siteName not in siteCandidates: continue #Add the LFNs to the disk/tape lists diskLFNs = sitesData[ siteName ][ 'disk' ] tapeLFNs = sitesData[ siteName ][ 'tape' ] if seStatus[ 'DiskSE' ]: #Sets contain only unique elements, no need to check if it's there diskLFNs.add( lfn ) if lfn in tapeLFNs: tapeLFNs.remove( lfn ) if seStatus[ 'TapeSE' ]: if lfn not in diskLFNs: tapeLFNs.add( lfn ) for siteName in sitesData: sitesData[siteName]['disk'] = len( sitesData[siteName]['disk'] ) sitesData[siteName]['tape'] = len( sitesData[siteName]['tape'] ) return S_OK( sitesData )
def _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, checkOnlyTapeSEs = None, jobLog = None, proxyUserName = None, proxyUserGroup = None, executionLock = None ): """ Checks on SEs whether the file is NEARLINE or ONLINE onlineLFNs, offlineLFNs and absentLFNs are modified to contain the files found online If checkOnlyTapeSEs is True, disk replicas are not checked As soon as a replica is found Online for a file, no further check is made """ # Only check on storage if it is a tape SE if jobLog is None: logger = gLogger else: logger = jobLog if checkOnlyTapeSEs is None: # Default value is True checkOnlyTapeSEs = True failed = {} for se, lfnsInSEList in seToLFNs.iteritems(): # If we have found already all files online at another SE, no need to check the others # but still we want to set the SE as Online if not a TapeSE vo = getVOForGroup( proxyUserGroup ) seObj = StorageElement( se, vo = vo ) status = seObj.getStatus() if not status['OK']: return status tapeSE = status['Value']['TapeSE'] diskSE = status['Value']['DiskSE'] # If requested to check only Tape SEs and the file is at a diskSE, we guess it is Online... filesToCheck = [] for lfn in lfnsInSEList: # If the file had already been found accessible at an SE, only check that this one is on disk diskIsOK = checkOnlyTapeSEs or ( lfn in onlineLFNs ) if diskIsOK and diskSE: onlineLFNs.setdefault( lfn, [] ).append( se ) elif not diskIsOK or ( tapeSE and ( lfn not in onlineLFNs ) ): filesToCheck.append( lfn ) if not filesToCheck: continue # We have to use a new SE object because it caches the proxy! with UserProxy(proxyUserName=proxyUserName, proxyUserGroup=proxyUserGroup, executionLock=executionLock) as proxyResult: if proxyResult['OK']: fileMetadata = StorageElement(se, vo=vo).getFileMetadata(filesToCheck) else: fileMetadata = proxyResult if not fileMetadata['OK']: failed[se] = dict.fromkeys( filesToCheck, fileMetadata['Message'] ) else: if fileMetadata['Value']['Failed']: failed[se] = fileMetadata['Value']['Failed'] # is there at least one replica online? for lfn, mDict in fileMetadata['Value']['Successful'].iteritems(): # SRM returns Cached, but others may only return Accessible if mDict.get( 'Cached', mDict['Accessible'] ): onlineLFNs.setdefault( lfn, [] ).append( se ) elif tapeSE: # A file can be staged only at Tape SE offlineLFNs.setdefault( lfn, [] ).append( se ) else: # File not available at a diskSE... we shall retry later pass # Doesn't matter if some files are Offline if they are also online for lfn in set( offlineLFNs ) & set( onlineLFNs ): offlineLFNs.pop( lfn ) # If the file was found staged, ignore possible errors, but print out errors for se, failedLfns in failed.items(): logger.error( "Errors when getting files metadata", 'at %s' % se ) for lfn, reason in failedLfns.items(): if lfn in onlineLFNs: logger.warn( reason, 'for %s, but there is an online replica' % lfn ) failed[se].pop( lfn ) else: logger.error( reason, 'for %s, no online replicas' % lfn ) if cmpError( reason, errno.ENOENT ): absentLFNs.setdefault( lfn, [] ).append( se ) failed[se].pop( lfn ) if not failed[se]: failed.pop( se ) # Find the files that do not exist at SE if failed: logger.error( "Error getting metadata", "for %d files" % len( set( lfn for lfnList in failed.itervalues() for lfn in lfnList ) ) ) for lfn in absentLFNs: seList = absentLFNs[lfn] # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR( errno.ENOENT, "File not at %s" % ','.join( seList ) )['Message'] # Format the error for absent files return S_OK()