def __requestStaging( self, jobState, stageLFNs ): """ Actual request for staging LFNs through the StorageManagerClient """ self.jobLog.verbose( "Stage request will be \n\t%s" % "\n\t".join( [ "%s:%s" % ( lfn, stageLFNs[ lfn ] ) for lfn in stageLFNs ] ) ) stagerClient = StorageManagerClient() result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request To Be Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', int( jobState.jid ) ) if not result[ 'OK' ]: self.jobLog.error( "Could not send stage request: %s" % result[ 'Message' ] ) return S_ERROR( "Problem sending staging request" ) rid = str( result[ 'Value' ] ) self.jobLog.info( "Stage request %s sent" % rid ) jobState.setParameter( "StageRequest", rid ) result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result['OK']: return result return S_OK( stageLFNs )
def main(): Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 2: Script.showHelp() from DIRAC import exit as DIRACExit, gLogger lfn = args[0] se = args[1] from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() res = client.getCacheReplicas({'LFN': lfn, 'SE': se}) if not res['OK']: gLogger.error(res['Message']) cacheReplicaInfo = res['Value'] if cacheReplicaInfo: replicaID = list(cacheReplicaInfo)[0] outStr = "\n--------------------" outStr += "\n%s: %s" % ('LFN'.ljust(8), cacheReplicaInfo[replicaID]['LFN'].ljust(100)) outStr += "\n%s: %s" % ('SE'.ljust(8), cacheReplicaInfo[replicaID]['SE'].ljust(100)) outStr += "\n%s: %s" % ('PFN'.ljust(8), cacheReplicaInfo[replicaID]['PFN'].ljust(100)) outStr += "\n%s: %s" % ('Status'.ljust(8), cacheReplicaInfo[replicaID]['Status'].ljust(100)) outStr += "\n%s: %s" % ('LastUpdate'.ljust(8), str(cacheReplicaInfo[replicaID]['LastUpdate']).ljust(100)) outStr += "\n%s: %s" % ('Reason'.ljust(8), str(cacheReplicaInfo[replicaID]['Reason']).ljust(100)) resTasks = client.getTasks({'ReplicaID': replicaID}) if resTasks['OK']: # print resTasks['Message'] outStr += '\nJob IDs requesting this file to be staged:'.ljust(8) tasks = resTasks['Value'] for tid in tasks.keys(): outStr += ' %s ' % (tasks[tid]['SourceTaskID']) resStageRequests = client.getStageRequests({'ReplicaID': replicaID}) if not resStageRequests['OK']: gLogger.error(resStageRequests['Message']) if resStageRequests['Records']: stageRequests = resStageRequests['Value'] outStr += "\n------SRM staging request info--------------" for info in stageRequests.values(): outStr += "\n%s: %s" % ('SRM RequestID'.ljust(8), info['RequestID'].ljust(100)) outStr += "\n%s: %s" % ('SRM StageStatus'.ljust(8), info['StageStatus'].ljust(100)) outStr += "\n%s: %s" % ('SRM StageRequestSubmitTime'.ljust(8), str(info['StageRequestSubmitTime']).ljust(100)) outStr += "\n%s: %s" % ('SRM StageRequestCompletedTime'.ljust(8), str(info['StageRequestCompletedTime']).ljust(100)) outStr += "\n%s: %s" % ('SRM PinExpiryTime'.ljust(8), str(info['PinExpiryTime']).ljust(100)) outStr += "\n%s: %s sec" % ('SRM PinLength'.ljust(8), str(info['PinLength']).ljust(100)) else: outStr += '\nThere are no staging requests submitted to the site yet.'.ljust(8) else: outStr = "\nThere is no such file requested for staging. Check for typo's!" # Script.showHelp() gLogger.notice(outStr) DIRACExit(0)
def main(): Script.parseCommandLine(ignoreErrors=False) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC import exit as DIRACExit, gLogger try: jobIDs = [int(arg) for arg in args] except BaseException: gLogger.fatal('DIRAC Job IDs must be integers') DIRACExit(2) from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() outStr = "\n" for jobID in jobIDs: res = client.getTaskSummary(jobID) if not res['OK']: gLogger.error(res['Message']) continue if not res['Value']: gLogger.notice( 'No info for job %s, probably gone from the stager...' % jobID) continue taskInfo = res['Value']['TaskInfo'] replicaInfo = res['Value']['ReplicaInfo'] outStr = "%s: %s" % ('JobID'.ljust(20), jobID) outStr += "\n%s: %s" % ('Status'.ljust(20), taskInfo[str(jobID)]['Status']) outStr += "\n%s: %s" % ('SubmitTime'.ljust(20), taskInfo[str(jobID)]['SubmitTime']) outStr += "\n%s: %s" % ('CompleteTime'.ljust(20), taskInfo[str(jobID)]['CompleteTime']) outStr += "\nStaging files for this job:" if not res['Value']['ReplicaInfo']: gLogger.notice('No info on files for the job = %s, that is odd' % jobID) continue else: for lfn, metadata in replicaInfo.items(): outStr += "\n\t--------------------" outStr += "\n\t%s: %s" % ('LFN'.ljust(8), lfn.ljust(100)) outStr += "\n\t%s: %s" % ( 'SE'.ljust(8), metadata['StorageElement'].ljust(100)) outStr += "\n\t%s: %s" % ('PFN'.ljust(8), str( metadata['PFN']).ljust(100)) outStr += "\n\t%s: %s" % ('Status'.ljust(8), metadata['Status'].ljust(100)) outStr += "\n\t%s: %s" % ('Reason'.ljust(8), str(metadata['Reason']).ljust(100)) outStr += "\n%s: %s" % ('LastUpdate'.ljust(8), str(metadata['LastUpdate']).ljust(100)) outStr += "\n----------------------" gLogger.notice(outStr) DIRACExit(0)
def initialize(self): self.stagerClient = StorageManagerClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK()
def initialize(self): self.stagerClient = StorageManagerClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption("shifterProxy", "DataManager") self.storagePlugins = self.am_getOption("StoragePlugins", []) self.dataOpSender = DataOperationSender() return S_OK()
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument( "Request: ID of the Stage request in the StorageManager") Script.parseCommandLine(ignoreErrors=False) args = Script.getPositionalArgs() if not len(args) == 1: Script.showHelp() from DIRAC import exit as DIRACExit, gLogger try: taskID = int(args[0]) except Exception: gLogger.fatal("Stage requestID must be an integer") DIRACExit(2) from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() res = client.getTaskSummary(taskID) if not res["OK"]: gLogger.error(res["Message"]) DIRACExit(2) taskInfo = res["Value"]["TaskInfo"] replicaInfo = res["Value"]["ReplicaInfo"] outStr = "%s: %s" % ("TaskID".ljust(20), taskID) outStr += "\n%s: %s" % ("Status".ljust(20), taskInfo[taskID]["Status"]) outStr += "\n%s: %s" % ("Source".ljust(20), taskInfo[taskID]["Source"]) outStr += "\n%s: %s" % ("SourceTaskID".ljust(20), taskInfo[taskID]["SourceTaskID"]) outStr += "\n%s: %s" % ("CallBackMethod".ljust(20), taskInfo[taskID]["CallBackMethod"]) outStr += "\n%s: %s" % ("SubmitTime".ljust(20), taskInfo[taskID]["SubmitTime"]) outStr += "\n%s: %s" % ("CompleteTime".ljust(20), taskInfo[taskID]["CompleteTime"]) for lfn, metadata in replicaInfo.items(): outStr += "\n" outStr += "\n\t%s: %s" % ("LFN".ljust(8), lfn.ljust(100)) outStr += "\n\t%s: %s" % ("SE".ljust(8), metadata["StorageElement"].ljust(100)) outStr += "\n\t%s: %s" % ("PFN".ljust(8), str( metadata["PFN"]).ljust(100)) outStr += "\n\t%s: %s" % ("Size".ljust(8), str( metadata["FileSize"]).ljust(100)) outStr += "\n\t%s: %s" % ("Status".ljust(8), metadata["Status"].ljust(100)) outStr += "\n\t%s: %s" % ("Reason".ljust(8), str( metadata["Reason"]).ljust(100)) gLogger.notice(outStr)
def initialize(self): self.fileCatalog = FileCatalog() self.dm = DataManager() self.stagerClient = StorageManagerClient() self.dataIntegrityClient = DataIntegrityClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption("shifterProxy", "DataManager") return S_OK()
def initialize(self): self.stagerClient = StorageManagerClient() # self.storageDB = StorageManagementDB() # pin lifetime = 1 day self.pinLifetime = self.am_getOption("PinLifetime", THROTTLING_TIME) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption("shifterProxy", "DataManager") return S_OK()
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument(["JobID: DIRAC Job ID"]) Script.parseCommandLine(ignoreErrors=False) args = Script.getPositionalArgs() if len(args) < 1: Script.showHelp() from DIRAC import exit as DIRACExit, gLogger try: jobIDs = [int(arg) for arg in args] except Exception: gLogger.fatal("DIRAC Job IDs must be integers") DIRACExit(2) from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() outStr = "\n" for jobID in jobIDs: res = client.getTaskSummary(jobID) if not res["OK"]: gLogger.error(res["Message"]) continue if not res["Value"]: gLogger.notice("No info for job %s, probably gone from the stager..." % jobID) continue taskInfo = res["Value"]["TaskInfo"] replicaInfo = res["Value"]["ReplicaInfo"] outStr = "%s: %s" % ("JobID".ljust(20), jobID) outStr += "\n%s: %s" % ("Status".ljust(20), taskInfo[str(jobID)]["Status"]) outStr += "\n%s: %s" % ("SubmitTime".ljust(20), taskInfo[str(jobID)]["SubmitTime"]) outStr += "\n%s: %s" % ("CompleteTime".ljust(20), taskInfo[str(jobID)]["CompleteTime"]) outStr += "\nStaging files for this job:" if not res["Value"]["ReplicaInfo"]: gLogger.notice("No info on files for the job = %s, that is odd" % jobID) continue else: for lfn, metadata in replicaInfo.items(): outStr += "\n\t--------------------" outStr += "\n\t%s: %s" % ("LFN".ljust(8), lfn.ljust(100)) outStr += "\n\t%s: %s" % ("SE".ljust(8), metadata["StorageElement"].ljust(100)) outStr += "\n\t%s: %s" % ("PFN".ljust(8), str(metadata["PFN"]).ljust(100)) outStr += "\n\t%s: %s" % ("Status".ljust(8), metadata["Status"].ljust(100)) outStr += "\n\t%s: %s" % ("Reason".ljust(8), str(metadata["Reason"]).ljust(100)) outStr += "\n%s: %s" % ("LastUpdate".ljust(8), str(metadata["LastUpdate"]).ljust(100)) outStr += "\n----------------------" gLogger.notice(outStr) DIRACExit(0)
def main(): Script.parseCommandLine(ignoreErrors=False) args = Script.getPositionalArgs() if not len(args) == 1: Script.showHelp() from DIRAC import exit as DIRACExit, gLogger try: taskID = int(args[0]) except BaseException: gLogger.fatal('Stage requestID must be an integer') DIRACExit(2) from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() res = client.getTaskSummary(taskID) if not res['OK']: gLogger.error(res['Message']) DIRACExit(2) taskInfo = res['Value']['TaskInfo'] replicaInfo = res['Value']['ReplicaInfo'] outStr = "%s: %s" % ('TaskID'.ljust(20), taskID) outStr += "\n%s: %s" % ('Status'.ljust(20), taskInfo[taskID]['Status']) outStr += "\n%s: %s" % ('Source'.ljust(20), taskInfo[taskID]['Source']) outStr += "\n%s: %s" % ('SourceTaskID'.ljust(20), taskInfo[taskID]['SourceTaskID']) outStr += "\n%s: %s" % ('CallBackMethod'.ljust(20), taskInfo[taskID]['CallBackMethod']) outStr += "\n%s: %s" % ('SubmitTime'.ljust(20), taskInfo[taskID]['SubmitTime']) outStr += "\n%s: %s" % ('CompleteTime'.ljust(20), taskInfo[taskID]['CompleteTime']) for lfn, metadata in replicaInfo.items(): outStr += "\n" outStr += "\n\t%s: %s" % ('LFN'.ljust(8), lfn.ljust(100)) outStr += "\n\t%s: %s" % ('SE'.ljust(8), metadata['StorageElement'].ljust(100)) outStr += "\n\t%s: %s" % ('PFN'.ljust(8), str( metadata['PFN']).ljust(100)) outStr += "\n\t%s: %s" % ('Size'.ljust(8), str( metadata['FileSize']).ljust(100)) outStr += "\n\t%s: %s" % ('Status'.ljust(8), metadata['Status'].ljust(100)) outStr += "\n\t%s: %s" % ('Reason'.ljust(8), str( metadata['Reason']).ljust(100)) gLogger.notice(outStr)
def initialize(self): self.replicaManager = ReplicaManager() self.stagerClient = StorageManagerClient() self.dataIntegrityClient = DataIntegrityClient() #self.storageDB = StorageManagementDB() # pin lifetime = 1 day self.pinLifetime = self.am_getOption('PinLifetime', THROTTLING_TIME) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK()
def main(): Script.parseCommandLine(ignoreErrors=True) args = Script.getPositionalArgs() if len(args) < 2: Script.showHelp() seName = args[1] fileName = args[0] import os from DIRAC import exit as DIRACExit, gLogger from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient stageLfns = {} if os.path.exists(fileName): try: lfnFile = open(fileName) lfns = [k.strip() for k in lfnFile.readlines()] lfnFile.close() except Exception: gLogger.exception('Can not open file', fileName) DIRACExit(-1) else: lfns = args[:len(args) - 1] stageLfns[seName] = lfns stagerClient = StorageManagerClient() res = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', 0) # fake JobID = 0 if not res['OK']: gLogger.error(res['Message']) DIRACExit(-1) else: gLogger.notice("Stage request submitted for LFNs:\n %s" % lfns) gLogger.notice("SE= %s" % seName) gLogger.notice( "You can check their status and progress with dirac-stager-monitor-file <LFN> <SE>" ) DIRACExit()
def main(): Script.parseCommandLine(ignoreErrors=False) from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() res = client.getCacheReplicasSummary() if not res["OK"]: gLogger.fatal(res["Message"]) DIRACExit(2) stagerInfo = res["Value"] outStr = "\n" outStr += " %s" % ("Status".ljust(20)) outStr += " %s" % ("SE".ljust(20)) outStr += " %s" % ("NumberOfFiles".ljust(20)) outStr += " %s" % ("Size(GB)".ljust(20)) outStr += " \n--------------------------------------------------------------------------\n" if stagerInfo: for info in stagerInfo.values(): outStr += " %s" % (info["Status"].ljust(20)) outStr += " %s" % (info["SE"].ljust(20)) outStr += " %s" % (str(info["NumFiles"]).ljust(20)) outStr += " %s\n" % (str(info["SumFiles"]).ljust(20)) else: outStr += " %s" % ("Nothing to see here...Bye") outStr += " \nWARNING: the Size for files with Status=New is not yet determined at the point of selection!\n" outStr += "--------------------- current status of the SE Caches from the DB-----------" res = client.getSubmittedStagePins() if not res["OK"]: gLogger.fatal(res["Message"]) DIRACExit(2) storageElementUsage = res["Value"] if storageElementUsage: for storageElement in storageElementUsage.keys(): seDict = storageElementUsage[storageElement] seDict["TotalSize"] = int(seDict["TotalSize"] / (1000 * 1000 * 1000.0)) outStr += " \n %s: %s replicas with a size of %.3f GB." % ( storageElement.ljust(15), str(seDict["Replicas"]).rjust(6), seDict["TotalSize"], ) else: outStr += " %s" % "\nStageRequest.getStorageUsage: No active stage/pin requests found." gLogger.notice(outStr) DIRACExit(0)
def __requestStaging(self, jobState, stageLFNs): """Actual request for staging LFNs through the StorageManagerClient""" self.jobLog.debug( "Stage request will be \n\t%s" % "\n\t".join(["%s:%s" % (lfn, stageLFNs[lfn]) for lfn in stageLFNs])) stagerClient = StorageManagerClient() result = jobState.setStatus( JobStatus.STAGING, self.ex_getOption("StagingMinorStatus", "Request To Be Sent"), appStatus="", source=self.ex_optimizerName(), ) if not result["OK"]: return result result = stagerClient.setRequest( stageLFNs, "WorkloadManagement", "updateJobFromStager@WorkloadManagement/JobStateUpdate", int(jobState.jid)) if not result["OK"]: self.jobLog.error("Could not send stage request", ": %s" % result["Message"]) return result rid = str(result["Value"]) self.jobLog.info("Stage request sent", "(%s)" % rid) self.storeOptimizerParam("StageRequest", rid) result = jobState.setStatus( JobStatus.STAGING, self.ex_getOption("StagingMinorStatus", "Request Sent"), appStatus="", source=self.ex_optimizerName(), ) if not result["OK"]: return result return S_OK(stageLFNs)
def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result
def requestStage( self, jobState, candidates, lfnData ): #Any site is as good as any so random time! stageSite = random.sample( candidates, 1 )[0] self.jobLog.info( "Site selected %s for staging" % stageSite ) result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: result = self.__getSEStatus( seName ) if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) stageLFNs = {} lfnToStage = [] for lfn in lfnData: replicas = lfnData[ lfn ][ 'Replicas' ] # Check SEs seStage = [] for seName in replicas: _surl = replicas[ seName ][ 'SURL' ] if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True # 4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) self.jobLog.info( "Stage request will be \n\t%s" % "\n\t".join( [ "%s:%s" % ( lfn, stageLFNs[ lfn ] ) for lfn in stageLFNs ] ) ) stagerClient = StorageManagerClient() result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'stageCallback@WorkloadManagement/OptimizationMind', int( jobState.jid ) ) if not result[ 'OK' ]: self.jobLog.error( "Could not send stage request: %s" % result[ 'Message' ] ) return S_ERROR( "Problem sending staging request" ) rid = str( result[ 'Value' ] ) self.jobLog.info( "Stage request %s sent" % rid ) jobState.setParameter( "StageRequest", rid ) result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result stageCandidates = [] for seName in stageLFNs: result = self.__getSitesForSE( seName ) if result[ 'OK' ]: stageCandidates.append( result[ 'Value' ] ) stageCandidates = candidates.intersection( *[ sC for sC in stageCandidates ] ).union( [ stageSite ] ) return S_OK( stageCandidates )
stageLfns = {} if os.path.exists(fileName): try: lfnFile = open(fileName) lfns = [k.strip() for k in lfnFile.readlines()] lfnFile.close() except Exception: gLogger.exception('Can not open file', fileName) DIRACExit(-1) else: lfns = args[:len(args) - 1] stageLfns[seName] = lfns stagerClient = StorageManagerClient() res = stagerClient.setRequest(stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', 0) # fake JobID = 0 if not res['OK']: gLogger.error(res['Message']) DIRACExit(-1) else: gLogger.notice("Stage request submitted for LFNs:\n %s" % lfns) gLogger.notice("SE= %s" % seName) gLogger.notice("You can check their status and progress with dirac-stager-monitor-file <LFN> <SE>") '''Example1: dirac-stager-stage-files.py filesToStage.txt GRIDKA-RDST Stage request submitted for LFNs:
def __kill_delete_jobs(self, jobIDList, right): """Kill (== set the status to "KILLED") or delete (== set the status to "DELETED") jobs as necessary :param list jobIDList: job IDs :param str right: right :return: S_OK()/S_ERROR() """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) badIDs = [] if validJobList: # Get job status to see what is to be killed or deleted result = self.jobDB.getJobsAttributes(validJobList, ["Status"]) if not result["OK"]: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result["Value"].items(): # can be an iterator if sDict["Status"] in (JobStatus.RUNNING, JobStatus.MATCHED, JobStatus.STALLED): killJobList.append(jobID) elif sDict["Status"] in ( JobStatus.SUBMITTING, JobStatus.RECEIVED, JobStatus.CHECKING, JobStatus.WAITING, JobStatus.RESCHEDULED, JobStatus.DONE, JobStatus.FAILED, JobStatus.KILLED, ): if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict["Status"] in [JobStatus.STAGING]: stagingJobList.append(jobID) for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result["OK"]: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result["OK"]: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result["OK"]: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() self.log.info( "Going to send killing signal to stager as well!") result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result["OK"]: self.log.warn("Failed to kill some Stager tasks", result["Message"]) if nonauthJobList or badIDs: result = S_ERROR("Some jobs failed deletion") if nonauthJobList: self.log.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result["NonauthorizedJobIDs"] = nonauthJobList if badIDs: self.log.warn("JobIDs failed to be deleted", str(badIDs)) result["FailedJobIDs"] = badIDs return result result = S_OK(validJobList) result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result["InvalidJobIDs"] = invalidJobList return result
def __setStagingRequest(self, job, destination, inputDataDict): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose('Destination site %s' % (destination)) self.log.verbose('Input Data: %s' % (inputDataDict)) destinationSEs = getSEsForSite(destination) if not destinationSEs['OK']: return S_ERROR('Could not determine SEs for site %s' % destination) destinationSEs = destinationSEs['Value'] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()['Value'] if seStatus['Read'] and seStatus['TapeSE']: siteTapeSEs.append(se) if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append(se) if not siteTapeSEs: return S_ERROR('No LocalSEs For Site') self.log.verbose('Site tape SEs: %s' % (', '.join(siteTapeSEs))) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict['Value']['Value']['Successful'] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if se not in siteTapeSEs: # this File is not being staged continue if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update({se: surl}) if not stageLfns.has_key(se): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append(lfn) # NEW WAY # Now we need to check is any LFN is in more than one SE if len(stageLfns) > 1: stageSEs = sorted([(len(stageLfns[se]), se) for se in stageLfns.keys()]) for lfn in stageSURLs: lfnFound = False for se in [item[1] for item in reversed(stageSEs)]: # for ( numberOfLfns, se ) in reversed( stageSEs ): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove(lfn) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', job) if request['OK']: self.jobDB.setJobParameter(int(job), 'StageRequest', str(request['Value'])) if not request['OK']: self.log.error('Problem sending Staging request:') self.log.error(request) return S_ERROR('Error Sending Staging Request') else: self.log.info('Staging request successfully sent') result = self.updateJobStatus(job, self.stagingStatus, self.stagingMinorStatus, "Unknown") if not result['OK']: return result return S_OK(stageLfns)
def run(): from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() queryDict = {} if 'status' in switchDict: queryDict['Status'] = str(switchDict['status']) if 'se' in switchDict: queryDict['SE'] = str(switchDict['se']) # weird: if there are no switches (dictionary is empty), then the --limit is ignored!! # must FIX that in StorageManagementDB.py! # ugly fix: newer = '1903-08-02 06:24:38' # select newer than if 'limit' in switchDict: gLogger.notice("Query limited to %s entries" % switchDict['limit']) res = client.getCacheReplicas(queryDict, None, newer, None, None, int(switchDict['limit'])) else: res = client.getCacheReplicas(queryDict) if not res['OK']: gLogger.error(res['Message']) outStr = "\n" if res['Records']: replicas = res['Value'] outStr += " %s" % ("Status".ljust(15)) outStr += " %s" % ("LastUpdate".ljust(20)) outStr += " %s" % ("LFN".ljust(80)) outStr += " %s" % ("SE".ljust(10)) outStr += " %s" % ("Reason".ljust(10)) if 'showJobs' in switchDict: outStr += " %s" % ("Jobs".ljust(10)) outStr += " %s" % ("PinExpiryTime".ljust(15)) outStr += " %s" % ("PinLength(sec)".ljust(15)) outStr += "\n" for crid, info in replicas.iteritems(): outStr += " %s" % (info['Status'].ljust(15)) outStr += " %s" % (str(info['LastUpdate']).ljust(20)) outStr += " %s" % (info['LFN'].ljust(30)) outStr += " %s" % (info['SE'].ljust(15)) outStr += " %s" % (str(info['Reason']).ljust(10)) # Task info if 'showJobs' in switchDict: resTasks = client.getTasks({'ReplicaID': crid}) if resTasks['OK']: if resTasks['Value']: tasks = resTasks['Value'] jobs = [] for tid in tasks: jobs.append(tasks[tid]['SourceTaskID']) outStr += ' %s ' % (str(jobs).ljust(10)) else: outStr += ' %s ' % (" --- ".ljust(10)) # Stage request info # what if there's no request to the site yet? resStageRequests = client.getStageRequests({'ReplicaID': crid}) if not resStageRequests['OK']: gLogger.error(resStageRequests['Message']) if resStageRequests['Records']: stageRequests = resStageRequests['Value'] for info in stageRequests.itervalues(): outStr += " %s" % (str(info['PinExpiryTime']).ljust(20)) outStr += " %s" % (str(info['PinLength']).ljust(10)) outStr += "\n" gLogger.notice(outStr) else: gLogger.notice("No entries")
def __requestStaging(self, jobState, stageSite, opData): result = getSEsForSite(stageSite) if not result['OK']: return S_ERROR('Could not determine SEs for site %s' % stageSite) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement(seName) result = se.getStatus() if not result['OK']: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result['Message'])) return S_ERROR("Cannot retrieve SE status") seStatus = result['Value'] if seStatus['Read'] and seStatus['TapeSE']: tapeSEs.append(seName) if seStatus['Read'] and seStatus['DiskSE']: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.verbose("Tape SEs are %s" % (", ".join(tapeSEs))) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[lfn] # Check SEs seStage = [] for seName in replicas: _surl = replicas[seName] if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs.keys()])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if len(stageLFNs[seName]) == 0: stageLFNs.pop(seName) self.jobLog.verbose( "Stage request will be \n\t%s" % "\n\t".join(["%s:%s" % (lfn, stageLFNs[lfn]) for lfn in stageLFNs])) stagerClient = StorageManagerClient() result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', int(jobState.jid)) if not result['OK']: self.jobLog.error("Could not send stage request: %s" % result['Message']) return S_ERROR("Problem sending staging request") rid = str(result['Value']) self.jobLog.info("Stage request %s sent" % rid) jobState.setParameter("StageRequest", rid) result = jobState.setStatus(self.ex_getOption('StagingStatus', 'Staging'), self.ex_getOption('StagingMinorStatus', 'Request Sent'), appStatus="", source=self.ex_optimizerName()) if not result['OK']: return result return S_OK(stageLFNs)
def run(): global subLogger from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() queryDict = {} if "status" in switchDict: queryDict["Status"] = str(switchDict["status"]) if "se" in switchDict: queryDict["SE"] = str(switchDict["se"]) # weird: if there are no switches (dictionary is empty), then the --limit is ignored!! # must FIX that in StorageManagementDB.py! # ugly fix: newer = "1903-08-02 06:24:38" # select newer than if "limit" in switchDict: gLogger.notice("Query limited to %s entries" % switchDict["limit"]) res = client.getCacheReplicas(queryDict, None, newer, None, None, int(switchDict["limit"])) else: res = client.getCacheReplicas(queryDict) if not res["OK"]: gLogger.error(res["Message"]) outStr = "\n" if res["Records"]: replicas = res["Value"] outStr += " %s" % ("Status".ljust(15)) outStr += " %s" % ("LastUpdate".ljust(20)) outStr += " %s" % ("LFN".ljust(80)) outStr += " %s" % ("SE".ljust(10)) outStr += " %s" % ("Reason".ljust(10)) if "showJobs" in switchDict: outStr += " %s" % ("Jobs".ljust(10)) outStr += " %s" % ("PinExpiryTime".ljust(15)) outStr += " %s" % ("PinLength(sec)".ljust(15)) outStr += "\n" for crid, info in replicas.items(): outStr += " %s" % (info["Status"].ljust(15)) outStr += " %s" % (str(info["LastUpdate"]).ljust(20)) outStr += " %s" % (info["LFN"].ljust(30)) outStr += " %s" % (info["SE"].ljust(15)) outStr += " %s" % (str(info["Reason"]).ljust(10)) # Task info if "showJobs" in switchDict: resTasks = client.getTasks({"ReplicaID": crid}) if resTasks["OK"]: if resTasks["Value"]: tasks = resTasks["Value"] jobs = [] for tid in tasks: jobs.append(tasks[tid]["SourceTaskID"]) outStr += " %s " % (str(jobs).ljust(10)) else: outStr += " %s " % (" --- ".ljust(10)) # Stage request info # what if there's no request to the site yet? resStageRequests = client.getStageRequests({"ReplicaID": crid}) if not resStageRequests["OK"]: gLogger.error(resStageRequests["Message"]) if resStageRequests["Records"]: stageRequests = resStageRequests["Value"] for info in stageRequests.values(): outStr += " %s" % (str( info["PinExpiryTime"]).ljust(20)) outStr += " %s" % (str(info["PinLength"]).ljust(10)) outStr += "\n" gLogger.notice(outStr) else: gLogger.notice("No entries")
def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary :param list jobIDList: job IDs :param str right: right :return: S_OK()/S_ERROR() """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = self.jobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): # can be an iterator if sDict['Status'] in (JobStatus.RUNNING, JobStatus.MATCHED, JobStatus.STALLED): killJobList.append(jobID) elif sDict['Status'] in (JobStatus.DONE, JobStatus.FAILED, JobStatus.KILLED): if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() self.log.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: self.log.warn('Failed to kill some Stager tasks', result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: self.log.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: self.log.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument("LFN: LFN of the staging file") Script.registerArgument("SE: Storage Element for the staging file") Script.parseCommandLine(ignoreErrors=True) from DIRAC import exit as DIRACExit, gLogger lfn, se = Script.getPositionalArgs(group=True) from DIRAC.StorageManagementSystem.Client.StorageManagerClient import StorageManagerClient client = StorageManagerClient() res = client.getCacheReplicas({"LFN": lfn, "SE": se}) if not res["OK"]: gLogger.error(res["Message"]) cacheReplicaInfo = res["Value"] if cacheReplicaInfo: replicaID = list(cacheReplicaInfo)[0] outStr = "\n--------------------" outStr += "\n%s: %s" % ("LFN".ljust(8), cacheReplicaInfo[replicaID]["LFN"].ljust(100)) outStr += "\n%s: %s" % ("SE".ljust(8), cacheReplicaInfo[replicaID]["SE"].ljust(100)) outStr += "\n%s: %s" % ("PFN".ljust(8), cacheReplicaInfo[replicaID]["PFN"].ljust(100)) outStr += "\n%s: %s" % ("Status".ljust(8), cacheReplicaInfo[replicaID]["Status"].ljust(100)) outStr += "\n%s: %s" % ("LastUpdate".ljust(8), str(cacheReplicaInfo[replicaID]["LastUpdate"]).ljust(100)) outStr += "\n%s: %s" % ("Reason".ljust(8), str(cacheReplicaInfo[replicaID]["Reason"]).ljust(100)) resTasks = client.getTasks({"ReplicaID": replicaID}) if resTasks["OK"]: # print resTasks['Message'] outStr += "\nJob IDs requesting this file to be staged:".ljust(8) tasks = resTasks["Value"] for tid in tasks.keys(): outStr += " %s " % (tasks[tid]["SourceTaskID"]) resStageRequests = client.getStageRequests({"ReplicaID": replicaID}) if not resStageRequests["OK"]: gLogger.error(resStageRequests["Message"]) if resStageRequests["Records"]: stageRequests = resStageRequests["Value"] outStr += "\n------SRM staging request info--------------" for info in stageRequests.values(): outStr += "\n%s: %s" % ("SRM RequestID".ljust(8), info["RequestID"].ljust(100)) outStr += "\n%s: %s" % ("SRM StageStatus".ljust(8), info["StageStatus"].ljust(100)) outStr += "\n%s: %s" % ( "SRM StageRequestSubmitTime".ljust(8), str(info["StageRequestSubmitTime"]).ljust(100), ) outStr += "\n%s: %s" % ( "SRM StageRequestCompletedTime".ljust(8), str(info["StageRequestCompletedTime"]).ljust(100), ) outStr += "\n%s: %s" % ("SRM PinExpiryTime".ljust(8), str(info["PinExpiryTime"]).ljust(100)) outStr += "\n%s: %s sec" % ("SRM PinLength".ljust(8), str(info["PinLength"]).ljust(100)) else: outStr += "\nThere are no staging requests submitted to the site yet.".ljust(8) else: outStr = "\nThere is no such file requested for staging. Check for typo's!" # Script.showHelp() gLogger.notice(outStr) DIRACExit(0)