result = getVOfromProxyGroup() if not result['OK']: gLogger.notice('Error:', result['Message']) DIRAC.exit(1) vo = result['Value'] resources = Resources(vo=vo) result = resources.getEligibleStorageElements() if not result['OK']: gLogger.notice('Error:', result['Message']) DIRAC.exit(2) seList = sortList(result['Value']) resourceStatus = ResourceStatus() result = resourceStatus.getStorageStatus(seList) if not result['OK']: gLogger.notice('Error:', result['Message']) DIRAC.exit(3) for k, v in result['Value'].items(): readState, writeState = 'Active', 'Active' if v.has_key('ReadAccess'): readState = v['ReadAccess'] if v.has_key('WriteAccess'): writeState = v['WriteAccess'] gLogger.notice( "%s %s %s" %
class StrategyHandler( object ): """ .. class:: StrategyHandler StrategyHandler is a helper class for determining optimal replication tree for given source files, their replicas and target storage elements. """ def __init__( self, configSection, channels=None, bandwidths=None, failedFiles=None ): """c'tor :param self: self reference :param str configSection: path on CS to ReplicationScheduler agent :param bandwithds: observed throughput on active channels :param channels: active channels :param int failedFiles: max number of distinct failed files to allow scheduling """ ## save config section self.configSection = configSection + "/" + self.__class__.__name__ ## ## sublogger self.log = gLogger.getSubLogger( "StrategyHandler", child=True ) self.log.setLevel( gConfig.getValue( self.configSection + "/LogLevel", "DEBUG" ) ) self.supportedStrategies = [ 'Simple', 'DynamicThroughput', 'Swarm', 'MinimiseTotalWait' ] self.log.info( "Supported strategies = %s" % ", ".join( self.supportedStrategies ) ) self.sigma = gConfig.getValue( self.configSection + '/HopSigma', 0.0 ) self.log.info( "HopSigma = %s" % self.sigma ) self.schedulingType = gConfig.getValue( self.configSection + '/SchedulingType', 'File' ) self.log.info( "SchedulingType = %s" % self.schedulingType ) self.activeStrategies = gConfig.getValue( self.configSection + '/ActiveStrategies', ['MinimiseTotalWait'] ) self.log.info( "ActiveStrategies = %s" % ", ".join( self.activeStrategies ) ) self.numberOfStrategies = len( self.activeStrategies ) self.log.info( "Number of active strategies = %s" % self.numberOfStrategies ) self.acceptableFailureRate = gConfig.getValue( self.configSection + '/AcceptableFailureRate', 75 ) self.log.info( "AcceptableFailureRate = %s" % self.acceptableFailureRate ) self.acceptableFailedFiles = gConfig.getValue( self.configSection + "/AcceptableFailedFiles", 5 ) self.log.info( "AcceptableFailedFiles = %s" % self.acceptableFailedFiles ) self.rwUpdatePeriod = gConfig.getValue( self.configSection + "/RssRWUpdatePeriod", 600 ) self.log.info( "RSSUpdatePeriod = %s s" % self.rwUpdatePeriod ) self.rwUpdatePeriod = datetime.timedelta( seconds=self.rwUpdatePeriod ) ## bandwithds self.bandwidths = bandwidths if bandwidths else {} ## channels self.channels = channels if channels else {} ## distinct failed files per channel self.failedFiles = failedFiles if failedFiles else {} ## chosen strategy self.chosenStrategy = 0 ## fts graph self.ftsGraph = None ## timestamp for last update self.lastRssUpdate = datetime.datetime.now() # dispatcher self.strategyDispatcher = { "MinimiseTotalWait" : self.minimiseTotalWait, "DynamicThroughput" : self.dynamicThroughput, "Simple" : self.simple, "Swarm" : self.swarm } ## own RSS client self.resourceStatus = ResourceStatus() ## create fts graph ftsGraph = self.setup( self.channels, self.bandwidths, self.failedFiles ) if not ftsGraph["OK"]: raise SHGraphCreationError( ftsGraph["Message"] ) self.log.info("%s has been constructed" % self.__class__.__name__ ) def setup( self, channels, bandwithds, failedFiles ): """ prepare fts graph :param dict channels: { channelID : { "Files" : long , Size = long, "ChannelName" : str, "Source" : str, "Destination" : str , "ChannelName" : str, "Status" : str } } :param dict bandwidths: { channelID { "Throughput" : float, "Fileput" : float, "SucessfulFiles" : long, "FailedFiles" : long } } :param dict failedFiles: { channelID : int } channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} } """ graph = FTSGraph( "sites" ) result = getStorageElementSiteMapping() if not result['OK']: return result sitesDict = result['Value'] ## create nodes for site, ses in sitesDict.items(): rwDict = self.__getRWAccessForSE( ses ) if not rwDict["OK"]: return rwDict siteName = site if '.' in site: siteName = site.split('.')[1] graph.addNode( LCGSite( siteName, { "SEs" : rwDict["Value"] } ) ) ## channels { channelID : { "Files" : long , Size = long, "ChannelName" : str, ## "Source" : str, "Destination" : str , ## "ChannelName" : str, "Status" : str } } ## bandwidths { channelID { "Throughput" : float, "Fileput" : float, ## "SucessfulFiles" : long, "FailedFiles" : long } } ## channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} } for channelID, channelDict in channels.items(): sourceName = channelDict["Source"] destName = channelDict["Destination"] fromNode = graph.getNode( sourceName ) toNode = graph.getNode( destName ) if fromNode and toNode: rwAttrs = { "status" : channels[channelID]["Status"], "files" : channelDict["Files"], "size" : channelDict["Size"], "successfulAttempts" : bandwithds[channelID]["SuccessfulFiles"], "failedAttempts" : bandwithds[channelID]["FailedFiles"], "distinctFailedFiles" : failedFiles.get( channelID, 0 ), "fileput" : bandwithds[channelID]["Fileput"], "throughput" : bandwithds[channelID]["Throughput"] } roAttrs = { "channelID" : channelID, "channelName" : channelDict["ChannelName"], "acceptableFailureRate" : self.acceptableFailureRate, "acceptableFailedFiles" : self.acceptableFailedFiles, "schedulingType" : self.schedulingType } ftsChannel = FTSChannel( fromNode, toNode, rwAttrs, roAttrs ) graph.addEdge( ftsChannel ) self.ftsGraph = graph self.lastRssUpdate = datetime.datetime.now() return S_OK() def updateGraph( self, rwAccess=False, replicationTree=None, size=0.0 ): """ update rw access for nodes (sites) and size anf files for edges (channels) """ replicationTree = replicationTree if replicationTree else {} size = size if size else 0.0 ## update nodes rw access for SEs if rwAccess: for lcgSite in self.ftsGraph.nodes(): rwDict = self.__getRWAccessForSE( lcgSite.SEs.keys() ) if not rwDict["OK"]: return rwDict lcgSite.SEs = rwDict["Value"] ## update channels size and files if replicationTree: for channel in self.ftsGraph.edges(): if channel.channelID in replicationTree: channel.size += size channel.files += 1 return S_OK() def simple( self, sourceSEs, targetSEs ): """ simple strategy - one source, many targets :param list sourceSEs: list with only one sourceSE name :param list targetSEs: list with target SE names :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ ## make targetSEs list unique if len(sourceSEs) != 1: return S_ERROR( "simple: wrong argument supplied for sourceSEs, only one sourceSE allowed" ) sourceSE = sourceSEs[0] tree = {} for targetSE in targetSEs: channel = self.ftsGraph.findChannel( sourceSE, targetSE ) if not channel["OK"]: return S_ERROR( channel["Message"] ) channel = channel["Value"] if not channel.fromNode.SEs[sourceSE]["read"]: return S_ERROR( "simple: sourceSE '%s' in banned for reading rigth now" % sourceSE ) if not channel.toNode.SEs[targetSE]["write"]: return S_ERROR( "simple: targetSE '%s' is banned for writing rigth now" % targetSE ) if channel.channelID in tree: return S_ERROR( "simple: unable to create replication tree, channel '%s' cannot be used twice" %\ channel.channelName ) tree[channel.channelID] = { "Ancestor" : False, "SourceSE" : sourceSE, "DestSE" : targetSE, "Strategy" : "Simple" } return S_OK(tree) def swarm( self, sourceSEs, targetSEs ): """ swarm strategy - one target, many sources, pick up the fastest :param list sourceSEs: list of source SE :param str targetSEs: on element list with name of target SE :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ tree = {} channels = [] if len(targetSEs) > 1: return S_ERROR("swarm: wrong argument supplied for targetSEs, only one targetSE allowed") targetSE = targetSEs[0] ## find channels for sourceSE in sourceSEs: channel = self.ftsGraph.findChannel( sourceSE, targetSE ) if not channel["OK"]: self.log.warn( "swarm: %s" % channel["Message"] ) continue channels.append( ( sourceSE, channel["Value"] ) ) ## exit - no channels if not channels: return S_ERROR("swarm: unable to find FTS channels between '%s' and '%s'" % ( ",".join(sourceSEs), targetSE ) ) ## filter out non active channels channels = [ ( sourceSE, channel ) for sourceSE, channel in channels if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and channel.status == "Active" and channel.timeToStart < float("inf") ] ## exit - no active channels if not channels: return S_ERROR( "swarm: no active channels found between %s and %s" % ( sourceSEs, targetSE ) ) ## find min timeToStart minTimeToStart = float("inf") selSourceSE = selChannel = None for sourceSE, ftsChannel in channels: if ftsChannel.timeToStart < minTimeToStart: minTimeToStart = ftsChannel.timeToStart selSourceSE = sourceSE selChannel = ftsChannel if not selSourceSE: return S_ERROR( "swarm: no active channels found between %s and %s" % ( sourceSEs, targetSE ) ) tree[selChannel.channelID] = { "Ancestor" : False, "SourceSE" : selSourceSE, "DestSE" : targetSE, "Strategy" : "Swarm" } return S_OK( tree ) def minimiseTotalWait( self, sourceSEs, targetSEs ): """ find dag that minimises start time :param list sourceSEs: list of avialable source SEs :param list targetSEs: list of target SEs :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ tree = {} primarySources = sourceSEs while targetSEs: minTimeToStart = float("inf") channels = [] for targetSE in targetSEs: for sourceSE in sourceSEs: ftsChannel = self.ftsGraph.findChannel( sourceSE, targetSE ) if not ftsChannel["OK"]: self.log.warn( "minimiseTotalWait: %s" % ftsChannel["Message"] ) continue ftsChannel = ftsChannel["Value"] channels.append( ( ftsChannel, sourceSE, targetSE ) ) if not channels: msg = "minimiseTotalWait: FTS channels between %s and %s not defined" % ( ",".join(sourceSEs), ",".join(targetSEs) ) self.log.error( msg ) return S_ERROR( msg ) ## filter out already used channels channels = [ (channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels if channel.channelID not in tree ] if not channels: msg = "minimiseTotalWait: all FTS channels between %s and %s are already used in tree" % ( ",".join(sourceSEs), ",".join(targetSEs) ) self.log.error( msg ) return S_ERROR( msg ) self.log.debug("minimiseTotalWait: found %s candiate channels, checking activity" % len( channels) ) channels = [ ( channel, sourceSE, targetSE ) for channel, sourceSE, targetSE in channels if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and channel.status == "Active" and channel.timeToStart < float("inf") ] if not channels: self.log.error("minimiseTotalWait: no active FTS channels found" ) return S_ERROR("minimiseTotalWait: no active FTS channels found" ) candidates = [] for channel, sourceSE, targetSE in channels: timeToStart = channel.timeToStart if sourceSE not in primarySources: timeToStart += self.sigma ## local found if channel.fromNode == channel.toNode: self.log.debug("minimiseTotalWait: found local channel '%s'" % channel.channelName ) candidates = [ ( channel, sourceSE, targetSE ) ] break if timeToStart <= minTimeToStart: minTimeToStart = timeToStart candidates = [ ( channel, sourceSE, targetSE ) ] elif timeToStart == minTimeToStart: candidates.append( (channel, sourceSE, targetSE ) ) if not candidates: return S_ERROR("minimiseTotalWait: unable to find candidate FTS channels minimising total wait time") random.shuffle( candidates ) selChannel, selSourceSE, selTargetSE = candidates[0] ancestor = False for channelID, treeItem in tree.items(): if selSourceSE in treeItem["DestSE"]: ancestor = channelID tree[selChannel.channelID] = { "Ancestor" : ancestor, "SourceSE" : selSourceSE, "DestSE" : selTargetSE, "Strategy" : "MinimiseTotalWait" } sourceSEs.append( selTargetSE ) targetSEs.remove( selTargetSE ) return S_OK(tree) def dynamicThroughput( self, sourceSEs, targetSEs ): """ dynamic throughput - many sources, many targets - find dag that minimises overall throughput :param list sourceSEs: list of available source SE names :param list targetSE: list of target SE names :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ tree = {} primarySources = sourceSEs timeToSite = {} while targetSEs: minTimeToStart = float("inf") channels = [] for targetSE in targetSEs: for sourceSE in sourceSEs: ftsChannel = self.ftsGraph.findChannel( sourceSE, targetSE ) if not ftsChannel["OK"]: self.log.warn( "dynamicThroughput: %s" % ftsChannel["Message"] ) continue ftsChannel = ftsChannel["Value"] channels.append( ( ftsChannel, sourceSE, targetSE ) ) ## no candidate channels found if not channels: msg = "dynamicThroughput: FTS channels between %s and %s are not defined" % ( ",".join(sourceSEs), ",".join(targetSEs) ) self.log.error( msg ) return S_ERROR( msg ) ## filter out already used channels channels = [ (channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels if channel.channelID not in tree ] if not channels: msg = "dynamicThroughput: all FTS channels between %s and %s are already used in tree" % ( ",".join(sourceSEs), ",".join(targetSEs) ) self.log.error( msg ) return S_ERROR( msg ) ## filter out non-active channels self.log.debug("dynamicThroughput: found %s candidate channels, checking activity" % len(channels) ) channels = [ ( channel, sourceSE, targetSE ) for channel, sourceSE, targetSE in channels if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and channel.status == "Active" and channel.timeToStart < float("inf") ] if not channels: self.log.info("dynamicThroughput: active candidate channels not found") return S_ERROR("dynamicThroughput: no active candidate FTS channels") candidates = [] selTimeToStart = None for channel, sourceSE, targetSE in channels: timeToStart = channel.timeToStart if sourceSE not in primarySources: timeToStart += self.sigma if sourceSE in timeToSite: timeToStart += timeToSite[sourceSE] ## local found if channel.fromNode == channel.toNode: self.log.debug("dynamicThroughput: found local channel '%s'" % channel.channelName ) candidates = [ ( channel, sourceSE, targetSE ) ] selTimeToStart = timeToStart break if timeToStart <= minTimeToStart: selTimeToStart = timeToStart minTimeToStart = timeToStart candidates = [ ( channel, sourceSE, targetSE ) ] elif timeToStart == minTimeToStart: candidates.append( (channel, sourceSE, targetSE ) ) if not candidates: return S_ERROR("dynamicThroughput: unable to find candidate FTS channels") random.shuffle( candidates ) selChannel, selSourceSE, selTargetSE = candidates[0] ancestor = False for channelID, treeItem in tree.items(): if selSourceSE in treeItem["DestSE"]: ancestor = channelID tree[selChannel.channelID] = { "Ancestor" : ancestor, "SourceSE" : selSourceSE, "DestSE" : selTargetSE, "Strategy" : "DynamicThroughput" } timeToSite[selTargetSE] = selTimeToStart sourceSEs.append( selTargetSE ) targetSEs.remove( selTargetSE ) return S_OK( tree ) def reset( self ): """ reset :chosenStrategy: :param self: self reference """ self.chosenStrategy = 0 def getSupportedStrategies( self ): """ Get supported strategies. :param self: self reference """ return self.supportedStrategies def replicationTree( self, sourceSEs, targetSEs, size, strategy=None ): """ get replication tree :param str lfn: LFN :param list sourceSEs: list of sources SE names to use :param list targetSEs: liost of target SE names to use :param long size: file size :param str strategy: strategy name """ ## update SEs rwAccess every rwUpdatePertion timedelta (default 300 s) now = datetime.datetime.now() if now - self.lastRssUpdate > self.rwUpdatePeriod: update = self.updateGraph( rwAccess=True ) if not update["OK"]: self.log.warn("replicationTree: unable to update FTS graph: %s" % update["Message"] ) else: self.lastRssUpdate = now ## get strategy strategy = strategy if strategy else self.__selectStrategy() if strategy not in self.getSupportedStrategies(): return S_ERROR("replicationTree: unsupported strategy '%s'" % strategy ) self.log.info( "replicationTree: strategy=%s sourceSEs=%s targetSEs=%s size=%s" %\ ( strategy, sourceSEs, targetSEs, size ) ) ## fire action from dispatcher tree = self.strategyDispatcher[strategy]( sourceSEs, targetSEs ) if not tree["OK"]: self.log.error( "replicationTree: %s" % tree["Message"] ) return tree ## update graph edges update = self.updateGraph( replicationTree=tree["Value"], size=size ) if not update["OK"]: self.log.error( "replicationTree: unable to update FTS graph: %s" % update["Message"] ) return update return tree def __selectStrategy( self ): """ If more than one active strategy use one after the other. :param self: self reference """ chosenStrategy = self.activeStrategies[self.chosenStrategy] self.chosenStrategy += 1 if self.chosenStrategy == self.numberOfStrategies: self.chosenStrategy = 0 return chosenStrategy def __getRWAccessForSE( self, seList ): """ get RSS R/W for :seList: :param list seList: SE list """ rwDict = dict.fromkeys( seList ) for se in rwDict: rwDict[se] = { "read" : False, "write" : False } rAccess = self.resourceStatus.getStorageStatus( seList, statusType = "ReadAccess" ) if not rAccess["OK"]: return rAccess rAccess = [ k for k, v in rAccess["Value"].items() if "ReadAccess" in v and v["ReadAccess"] in ( "Active", "Degraded" ) ] wAccess = self.resourceStatus.getStorageStatus( seList, statusType = "WriteAccess" ) if not wAccess["OK"]: return wAccess wAccess = [ k for k, v in wAccess["Value"].items() if "WriteAccess" in v and v["WriteAccess"] in ( "Active", "Degraded" ) ] for se in rwDict: rwDict[se]["read"] = se in rAccess rwDict[se]["write"] = se in wAccess return S_OK( rwDict )
result = getVOfromProxyGroup() if not result['OK']: gLogger.notice( 'Error:', result['Message'] ) DIRAC.exit( 1 ) vo = result['Value'] resources = Resources( vo = vo ) result = resources.getEligibleStorageElements() if not result['OK']: gLogger.notice( 'Error:', result['Message'] ) DIRAC.exit( 2 ) seList = sortList( result[ 'Value' ] ) resourceStatus = ResourceStatus() result = resourceStatus.getStorageStatus( seList ) if not result['OK']: gLogger.notice( 'Error:', result['Message'] ) DIRAC.exit( 3 ) for k,v in result[ 'Value' ].items(): readState, writeState = 'Active', 'Active' if v.has_key( 'ReadAccess' ): readState = v[ 'ReadAccess' ] if v.has_key( 'WriteAccess' ): writeState = v[ 'WriteAccess'] gLogger.notice("%s %s %s" % ( k.ljust(25),readState.rjust(15),writeState.rjust(15)) )
res = Resources().getStorageElements( site ) if not res[ 'OK' ]: gLogger.error( 'The provided site (%s) is not known.' % site ) DIRAC.exit( -1 ) ses.extend( res[ 'Value' ] ) if not ses: gLogger.error( 'There were no SEs provided' ) DIRAC.exit() readAllowed = [] writeAllowed = [] checkAllowed = [] resourceStatus = ResourceStatus() res = resourceStatus.getStorageStatus( ses ) if not res[ 'OK' ]: gLogger.error( 'Storage Element %s does not exist' % ses ) DIRAC.exit( -1 ) reason = 'Forced with dirac-admin-allow-se by %s' % userName for se, seOptions in res[ 'Value' ].items(): resW = resC = resR = { 'OK' : False } # InActive is used on the CS model, Banned is the equivalent in RSS if read and seOptions.has_key( 'ReadAccess' ): if not seOptions[ 'ReadAccess' ] in [ "InActive", "Banned", "Probing", "Degraded" ]:
if not res[ 'OK' ]: gLogger.error( 'The provided site (%s) is not known.' % site ) DIRAC.exit( -1 ) ses.extend( res[ 'Value' ][ 'SE' ].replace( ' ', '' ).split( ',' ) ) if not ses: gLogger.error( 'There were no SEs provided' ) DIRAC.exit( -1 ) readBanned = [] writeBanned = [] checkBanned = [] resourceStatus = ResourceStatus() res = resourceStatus.getStorageStatus( ses ) if not res['OK']: gLogger.error( "Storage Element %s does not exist" % ses ) DIRAC.exit( -1 ) reason = 'Forced with dirac-admin-ban-se by %s' % userName for se, seOptions in res[ 'Value' ].items(): resW = resC = resR = { 'OK' : False } # Eventually, we will get rid of the notion of InActive, as we always write Banned. if read and seOptions.has_key( 'ReadAccess' ): if not seOptions[ 'ReadAccess' ] in [ 'Active', 'Degraded', 'Probing' ]: gLogger.notice( 'Read option for %s is %s, instead of %s' % ( se, seOptions[ 'ReadAccess' ], [ 'Active', 'Degraded', 'Probing' ] ) )
class StorageFactory: def __init__(self, useProxy=False, vo=None): self.valid = True self.proxy = False self.proxy = useProxy self.resourceStatus = ResourceStatus() self.resourcesHelper = Resources(vo=vo) ########################################################################################### # # Below are public methods for obtaining storage objects # def getStorageName(self, initialName): return self._getConfigStorageName(initialName) def getStorage(self, parameterDict): """ This instantiates a single storage for the details provided and doesn't check the CS. """ # The storage name must be supplied. if parameterDict.has_key("StorageName"): storageName = parameterDict["StorageName"] else: errStr = "StorageFactory.getStorage: StorageName must be supplied" gLogger.error(errStr) return S_ERROR(errStr) # ProtocolName must be supplied otherwise nothing with work. if parameterDict.has_key("ProtocolName"): protocolName = parameterDict["ProtocolName"] else: errStr = "StorageFactory.getStorage: ProtocolName must be supplied" gLogger.error(errStr) return S_ERROR(errStr) # The other options need not always be specified if parameterDict.has_key("Protocol"): protocol = parameterDict["Protocol"] else: protocol = "" if parameterDict.has_key("Port"): port = parameterDict["Port"] else: port = "" if parameterDict.has_key("Host"): host = parameterDict["Host"] else: host = "" if parameterDict.has_key("Path"): path = parameterDict["Path"] else: path = "" if parameterDict.has_key("SpaceToken"): spaceToken = parameterDict["SpaceToken"] else: spaceToken = "" if parameterDict.has_key("WSUrl"): wsPath = parameterDict["WSUrl"] else: wsPath = "" return self.__generateStorageObject( storageName, protocolName, protocol, path, host, port, spaceToken, wsPath, parameterDict ) def getStorages(self, storageName, protocolList=[]): """ Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS 'storageName' is the DIRAC SE name i.e. 'CERN-RAW' 'protocolList' is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1'] """ self.remoteProtocols = [] self.localProtocols = [] self.name = "" self.options = {} self.protocolDetails = [] self.storages = [] # Get the name of the storage provided res = self._getConfigStorageName(storageName) if not res["OK"]: self.valid = False return res storageName = res["Value"] self.name = storageName # Get the options defined in the CS for this storage res = self._getConfigStorageOptions(storageName) if not res["OK"]: self.valid = False return res self.options = res["Value"] # Get the protocol specific details res = self._getConfigStorageProtocols(storageName) if not res["OK"]: self.valid = False return res self.protocolDetails = res["Value"] requestedLocalProtocols = [] requestedRemoteProtocols = [] requestedProtocolDetails = [] turlProtocols = [] # Generate the protocol specific plug-ins self.storages = [] for protocolDict in self.protocolDetails: protocolName = protocolDict["ProtocolName"] protocolRequested = True if protocolList: if protocolName not in protocolList: protocolRequested = False if protocolRequested: protocol = protocolDict["Protocol"] host = protocolDict["Host"] path = protocolDict["Path"] port = protocolDict["Port"] spaceToken = protocolDict["SpaceToken"] wsUrl = protocolDict["WSUrl"] res = self.__generateStorageObject( storageName, protocolName, protocol, path=path, host=host, port=port, spaceToken=spaceToken, wsUrl=wsUrl, parameters=protocolDict, ) if res["OK"]: self.storages.append(res["Value"]) if protocolName in self.localProtocols: turlProtocols.append(protocol) requestedLocalProtocols.append(protocolName) if protocolName in self.remoteProtocols: requestedRemoteProtocols.append(protocolName) requestedProtocolDetails.append(protocolDict) else: gLogger.info(res["Message"]) if len(self.storages) > 0: resDict = {} resDict["StorageName"] = self.name resDict["StorageOptions"] = self.options resDict["StorageObjects"] = self.storages resDict["LocalProtocols"] = requestedLocalProtocols resDict["RemoteProtocols"] = requestedRemoteProtocols resDict["ProtocolOptions"] = requestedProtocolDetails resDict["TurlProtocols"] = turlProtocols return S_OK(resDict) else: errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols." gLogger.error(errStr, self.name) return S_ERROR(errStr) ########################################################################################### # # Below are internal methods for obtaining section/option/value configuration # def _getConfigStorageName(self, storageName): """ This gets the name of the storage the configuration service. If the storage is an alias for another the resolution is performed. 'storageName' is the storage section to check in the CS """ result = self.resourcesHelper.getStorageElementOptionsDict(storageName) if not result["OK"]: errStr = "StorageFactory._getConfigStorageName: Failed to get storage options" gLogger.error(errStr, result["Message"]) return S_ERROR(errStr) if not result["Value"]: errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist." gLogger.error(errStr, storageName) return S_ERROR(errStr) if "Alias" in res["Value"]: configPath = "%s/%s/Alias" % (self.rootConfigPath, storageName) aliasName = gConfig.getValue(configPath) result = self._getConfigStorageName(aliasName) if not result["OK"]: errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist." gLogger.error(errStr, configPath) return S_ERROR(errStr) resolvedName = result["Value"] else: resolvedName = storageName return S_OK(resolvedName) def _getConfigStorageOptions(self, storageName): """ Get the options associated to the StorageElement as defined in the CS """ result = self.resourcesHelper.getStorageElementOptionsDict(storageName) if not result["OK"]: errStr = "StorageFactory._getStorageOptions: Failed to get storage options." gLogger.error(errStr, "%s: %s" % (storageName, result["Message"])) return S_ERROR(errStr) optionsDict = result["Value"] result = self.resourceStatus.getStorageStatus(storageName, "ReadAccess") if not result["OK"]: errStr = "StorageFactory._getStorageOptions: Failed to get storage status" gLogger.error(errStr, "%s: %s" % (storageName, result["Message"])) return S_ERROR(errStr) # optionsDict.update( result[ 'Value' ][ storageName ] ) return S_OK(optionsDict) def _getConfigStorageProtocols(self, storageName): """ Protocol specific information is present as sections in the Storage configuration """ result = getSiteForResource(storageName) if not result["OK"]: return result site = result["Value"] result = self.resourcesHelper.getEligibleNodes("AccessProtocol", {"Site": site, "Resource": storageName}) if not result["OK"]: return result nodesList = result["Value"] protocols = [] for node in nodesList: protocols.append(node) protocolDetails = [] for protocol in protocols: result = self._getConfigStorageProtocolDetails(protocol) if not result["OK"]: return result protocolDetails.append(result["Value"]) self.protocols = self.localProtocols + self.remoteProtocols return S_OK(protocolDetails) def _getConfigStorageProtocolDetails(self, protocol): """ Parse the contents of the protocol block """ result = self.resourcesHelper.getAccessProtocolOptionsDict(protocol) if not result["OK"]: return result optionsDict = result["Value"] # We must have certain values internally even if not supplied in CS protocolDict = { "Access": "", "Host": "", "Path": "", "Port": "", "Protocol": "", "ProtocolName": "", "SpaceToken": "", "WSUrl": "", } for option in optionsDict: protocolDict[option] = optionsDict[option] # Now update the local and remote protocol lists. # A warning will be given if the Access option is not set. if protocolDict["Access"] == "remote": self.remoteProtocols.append(protocolDict["ProtocolName"]) elif protocolDict["Access"] == "local": self.localProtocols.append(protocolDict["ProtocolName"]) else: errStr = ( "StorageFactory.__getProtocolDetails: The 'Access' option for %s is neither 'local' or 'remote'." % protocol ) gLogger.warn(errStr) # The ProtocolName option must be defined if not protocolDict["ProtocolName"]: errStr = "StorageFactory.__getProtocolDetails: 'ProtocolName' option is not defined." gLogger.error(errStr, "%s" % protocol) return S_ERROR(errStr) return S_OK(protocolDict) ########################################################################################### # # Below is the method for obtaining the object instantiated for a provided storage configuration # def __generateStorageObject( self, storageName, protocolName, protocol, path=None, host=None, port=None, spaceToken=None, wsUrl=None, parameters={}, ): storageType = protocolName if self.proxy: storageType = "Proxy" moduleRootPaths = getInstalledExtensions() moduleLoaded = False path = path.rstrip("/") if not path: path = "/" for moduleRootPath in moduleRootPaths: if moduleLoaded: break gLogger.verbose("Trying to load from root path %s" % moduleRootPath) moduleFile = os.path.join(rootPath, moduleRootPath, "Resources", "Storage", "%sStorage.py" % storageType) gLogger.verbose("Looking for file %s" % moduleFile) if not os.path.isfile(moduleFile): continue try: # This inforces the convention that the plug in must be named after the protocol moduleName = "%sStorage" % (storageType) storageModule = __import__( "%s.Resources.Storage.%s" % (moduleRootPath, moduleName), globals(), locals(), [moduleName] ) except Exception, x: errStr = "StorageFactory._generateStorageObject: Failed to import %s: %s" % (storageName, x) gLogger.exception(errStr) return S_ERROR(errStr) try: evalString = "storageModule.%s(storageName,protocol,path,host,port,spaceToken,wsUrl)" % moduleName storage = eval(evalString) if not storage.isOK(): errStr = "StorageFactory._generateStorageObject: Failed to instantiate storage plug in." gLogger.error(errStr, "%s" % (moduleName)) return S_ERROR(errStr) except Exception, x: errStr = "StorageFactory._generateStorageObject: Failed to instantiate %s(): %s" % (moduleName, x) gLogger.exception(errStr) return S_ERROR(errStr) # Set extra parameters if any if parameters: result = storage.setParameters(parameters) if not result["OK"]: return result # If use proxy, keep the original protocol name if self.proxy: storage.protocolName = protocolName return S_OK(storage)
class StorageFactory: def __init__( self, useProxy=False, vo = None ): self.valid = True self.proxy = False self.proxy = useProxy self.resourceStatus = ResourceStatus() self.resourcesHelper = Resources( vo = vo ) ########################################################################################### # # Below are public methods for obtaining storage objects # def getStorageName( self, initialName ): return self._getConfigStorageName( initialName ) def getStorage( self, parameterDict ): """ This instantiates a single storage for the details provided and doesn't check the CS. """ # The storage name must be supplied. if parameterDict.has_key( 'StorageName' ): storageName = parameterDict['StorageName'] else: errStr = "StorageFactory.getStorage: StorageName must be supplied" gLogger.error( errStr ) return S_ERROR( errStr ) # ProtocolName must be supplied otherwise nothing with work. if parameterDict.has_key( 'ProtocolName' ): protocolName = parameterDict['ProtocolName'] else: errStr = "StorageFactory.getStorage: ProtocolName must be supplied" gLogger.error( errStr ) return S_ERROR( errStr ) # The other options need not always be specified if parameterDict.has_key( 'Protocol' ): protocol = parameterDict['Protocol'] else: protocol = '' if parameterDict.has_key( 'Port' ): port = parameterDict['Port'] else: port = '' if parameterDict.has_key( 'Host' ): host = parameterDict['Host'] else: host = '' if parameterDict.has_key( 'Path' ): path = parameterDict['Path'] else: path = '' if parameterDict.has_key( 'SpaceToken' ): spaceToken = parameterDict['SpaceToken'] else: spaceToken = '' if parameterDict.has_key( 'WSUrl' ): wsPath = parameterDict['WSUrl'] else: wsPath = '' return self.__generateStorageObject( storageName, protocolName, protocol, path, host, port, spaceToken, wsPath, parameterDict ) def getStorages( self, storageName, protocolList = [] ): """ Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS 'storageName' is the DIRAC SE name i.e. 'CERN-RAW' 'protocolList' is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1'] """ self.remoteProtocols = [] self.localProtocols = [] self.name = '' self.options = {} self.protocolDetails = [] self.storages = [] # Get the name of the storage provided res = self._getConfigStorageName( storageName ) if not res['OK']: self.valid = False return res storageName = res['Value'] self.name = storageName # Get the options defined in the CS for this storage res = self._getConfigStorageOptions( storageName ) if not res['OK']: self.valid = False return res self.options = res['Value'] # Get the protocol specific details res = self._getConfigStorageProtocols( storageName ) if not res['OK']: self.valid = False return res self.protocolDetails = res['Value'] requestedLocalProtocols = [] requestedRemoteProtocols = [] requestedProtocolDetails = [] turlProtocols = [] # Generate the protocol specific plug-ins self.storages = [] for protocolDict in self.protocolDetails: protocolName = protocolDict['ProtocolName'] protocolRequested = True if protocolList: if protocolName not in protocolList: protocolRequested = False if protocolRequested: protocol = protocolDict['Protocol'] host = protocolDict['Host'] path = protocolDict['Path'] port = protocolDict['Port'] spaceToken = protocolDict['SpaceToken'] wsUrl = protocolDict['WSUrl'] res = self.__generateStorageObject( storageName, protocolName, protocol, path = path, host = host, port = port, spaceToken = spaceToken, wsUrl = wsUrl, parameters = protocolDict ) if res['OK']: self.storages.append( res['Value'] ) if protocolName in self.localProtocols: turlProtocols.append( protocol ) requestedLocalProtocols.append( protocolName ) if protocolName in self.remoteProtocols: requestedRemoteProtocols.append( protocolName ) requestedProtocolDetails.append( protocolDict ) else: gLogger.info( res['Message'] ) if len( self.storages ) > 0: resDict = {} resDict['StorageName'] = self.name resDict['StorageOptions'] = self.options resDict['StorageObjects'] = self.storages resDict['LocalProtocols'] = requestedLocalProtocols resDict['RemoteProtocols'] = requestedRemoteProtocols resDict['ProtocolOptions'] = requestedProtocolDetails resDict['TurlProtocols'] = turlProtocols return S_OK( resDict ) else: errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols." gLogger.error( errStr, self.name ) return S_ERROR( errStr ) ########################################################################################### # # Below are internal methods for obtaining section/option/value configuration # def _getConfigStorageName( self, storageName ): """ This gets the name of the storage the configuration service. If the storage is an alias for another the resolution is performed. 'storageName' is the storage section to check in the CS """ result = self.resourcesHelper.getStorageElementOptionsDict( storageName ) if not result['OK']: errStr = "StorageFactory._getConfigStorageName: Failed to get storage options" gLogger.error( errStr, result['Message'] ) return S_ERROR( errStr ) if not result['Value']: errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist." gLogger.error( errStr, storageName ) return S_ERROR( errStr ) if 'Alias' in res['Value']: configPath = '%s/%s/Alias' % ( self.rootConfigPath, storageName ) aliasName = gConfig.getValue( configPath ) result = self._getConfigStorageName( aliasName ) if not result['OK']: errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist." gLogger.error( errStr, configPath ) return S_ERROR( errStr ) resolvedName = result['Value'] else: resolvedName = storageName return S_OK( resolvedName ) def _getConfigStorageOptions( self, storageName ): """ Get the options associated to the StorageElement as defined in the CS """ result = self.resourcesHelper.getStorageElementOptionsDict( storageName ) if not result['OK']: errStr = "StorageFactory._getStorageOptions: Failed to get storage options." gLogger.error( errStr, "%s: %s" % ( storageName, result['Message'] ) ) return S_ERROR( errStr ) optionsDict = result['Value'] result = self.resourceStatus.getStorageStatus( storageName, 'ReadAccess' ) if not result[ 'OK' ]: errStr = "StorageFactory._getStorageOptions: Failed to get storage status" gLogger.error( errStr, "%s: %s" % ( storageName, result['Message'] ) ) return S_ERROR( errStr ) #optionsDict.update( result[ 'Value' ][ storageName ] ) return S_OK( optionsDict ) def _getConfigStorageProtocols( self, storageName ): """ Protocol specific information is present as sections in the Storage configuration """ result = getSiteForResource( storageName ) if not result['OK']: return result site = result['Value'] result = self.resourcesHelper.getEligibleNodes( 'AccessProtocol', {'Site': site, 'Resource': storageName } ) if not result['OK']: return result nodesList = result['Value'] protocols = [] for node in nodesList: protocols.append( node ) protocolDetails = [] for protocol in protocols: result = self._getConfigStorageProtocolDetails( protocol ) if not result['OK']: return result protocolDetails.append( result['Value'] ) self.protocols = self.localProtocols + self.remoteProtocols return S_OK( protocolDetails ) def _getConfigStorageProtocolDetails( self, protocol ): """ Parse the contents of the protocol block """ result = self.resourcesHelper.getAccessProtocolOptionsDict( protocol ) if not result['OK']: return result optionsDict = result['Value'] # We must have certain values internally even if not supplied in CS protocolDict = {'Access':'', 'Host':'', 'Path':'', 'Port':'', 'Protocol':'', 'ProtocolName':'', 'SpaceToken':'', 'WSUrl':''} for option in optionsDict: protocolDict[option] = optionsDict[option] # Now update the local and remote protocol lists. # A warning will be given if the Access option is not set. if protocolDict['Access'] == 'remote': self.remoteProtocols.append( protocolDict['ProtocolName'] ) elif protocolDict['Access'] == 'local': self.localProtocols.append( protocolDict['ProtocolName'] ) else: errStr = "StorageFactory.__getProtocolDetails: The 'Access' option for %s is neither 'local' or 'remote'." % protocol gLogger.warn( errStr ) # The ProtocolName option must be defined if not protocolDict['ProtocolName']: errStr = "StorageFactory.__getProtocolDetails: 'ProtocolName' option is not defined." gLogger.error( errStr, "%s" % protocol ) return S_ERROR( errStr ) return S_OK( protocolDict ) ########################################################################################### # # Below is the method for obtaining the object instantiated for a provided storage configuration # def __generateStorageObject( self, storageName, protocolName, protocol, path = None, host = None, port = None, spaceToken = None, wsUrl = None, parameters={} ): storageType = protocolName if self.proxy: storageType = 'Proxy' moduleRootPaths = getInstalledExtensions() moduleLoaded = False path = path.rstrip( '/' ) if not path: path = '/' for moduleRootPath in moduleRootPaths: if moduleLoaded: break gLogger.verbose( "Trying to load from root path %s" % moduleRootPath ) moduleFile = os.path.join( rootPath, moduleRootPath, "Resources", "Storage", "%sStorage.py" % storageType ) gLogger.verbose( "Looking for file %s" % moduleFile ) if not os.path.isfile( moduleFile ): continue try: # This inforces the convention that the plug in must be named after the protocol moduleName = "%sStorage" % ( storageType ) storageModule = __import__( '%s.Resources.Storage.%s' % ( moduleRootPath, moduleName ), globals(), locals(), [moduleName] ) except Exception, x: errStr = "StorageFactory._generateStorageObject: Failed to import %s: %s" % ( storageName, x ) gLogger.exception( errStr ) return S_ERROR( errStr ) try: evalString = "storageModule.%s(storageName,protocol,path,host,port,spaceToken,wsUrl)" % moduleName storage = eval( evalString ) if not storage.isOK(): errStr = "StorageFactory._generateStorageObject: Failed to instantiate storage plug in." gLogger.error( errStr, "%s" % ( moduleName ) ) return S_ERROR( errStr ) except Exception, x: errStr = "StorageFactory._generateStorageObject: Failed to instantiate %s(): %s" % ( moduleName, x ) gLogger.exception( errStr ) return S_ERROR( errStr ) # Set extra parameters if any if parameters: result = storage.setParameters( parameters ) if not result['OK']: return result # If use proxy, keep the original protocol name if self.proxy: storage.protocolName = protocolName return S_OK( storage )
class StrategyHandler(object): """ .. class:: StrategyHandler StrategyHandler is a helper class for determining optimal replication tree for given source files, their replicas and target storage elements. """ def __init__(self, configSection, channels=None, bandwidths=None, failedFiles=None): """c'tor :param self: self reference :param str configSection: path on CS to ReplicationScheduler agent :param bandwithds: observed throughput on active channels :param channels: active channels :param int failedFiles: max number of distinct failed files to allow scheduling """ ## save config section self.configSection = configSection + "/" + self.__class__.__name__ ## ## sublogger self.log = gLogger.getSubLogger("StrategyHandler", child=True) self.log.setLevel( gConfig.getValue(self.configSection + "/LogLevel", "DEBUG")) self.supportedStrategies = [ 'Simple', 'DynamicThroughput', 'Swarm', 'MinimiseTotalWait' ] self.log.info("Supported strategies = %s" % ", ".join(self.supportedStrategies)) self.sigma = gConfig.getValue(self.configSection + '/HopSigma', 0.0) self.log.info("HopSigma = %s" % self.sigma) self.schedulingType = gConfig.getValue( self.configSection + '/SchedulingType', 'File') self.log.info("SchedulingType = %s" % self.schedulingType) self.activeStrategies = gConfig.getValue( self.configSection + '/ActiveStrategies', ['MinimiseTotalWait']) self.log.info("ActiveStrategies = %s" % ", ".join(self.activeStrategies)) self.numberOfStrategies = len(self.activeStrategies) self.log.info("Number of active strategies = %s" % self.numberOfStrategies) self.acceptableFailureRate = gConfig.getValue( self.configSection + '/AcceptableFailureRate', 75) self.log.info("AcceptableFailureRate = %s" % self.acceptableFailureRate) self.acceptableFailedFiles = gConfig.getValue( self.configSection + "/AcceptableFailedFiles", 5) self.log.info("AcceptableFailedFiles = %s" % self.acceptableFailedFiles) self.rwUpdatePeriod = gConfig.getValue( self.configSection + "/RssRWUpdatePeriod", 600) self.log.info("RSSUpdatePeriod = %s s" % self.rwUpdatePeriod) self.rwUpdatePeriod = datetime.timedelta(seconds=self.rwUpdatePeriod) ## bandwithds self.bandwidths = bandwidths if bandwidths else {} ## channels self.channels = channels if channels else {} ## distinct failed files per channel self.failedFiles = failedFiles if failedFiles else {} ## chosen strategy self.chosenStrategy = 0 ## fts graph self.ftsGraph = None ## timestamp for last update self.lastRssUpdate = datetime.datetime.now() # dispatcher self.strategyDispatcher = { "MinimiseTotalWait": self.minimiseTotalWait, "DynamicThroughput": self.dynamicThroughput, "Simple": self.simple, "Swarm": self.swarm } ## own RSS client self.resourceStatus = ResourceStatus() ## create fts graph ftsGraph = self.setup(self.channels, self.bandwidths, self.failedFiles) if not ftsGraph["OK"]: raise SHGraphCreationError(ftsGraph["Message"]) self.log.info("%s has been constructed" % self.__class__.__name__) def setup(self, channels, bandwithds, failedFiles): """ prepare fts graph :param dict channels: { channelID : { "Files" : long , Size = long, "ChannelName" : str, "Source" : str, "Destination" : str , "ChannelName" : str, "Status" : str } } :param dict bandwidths: { channelID { "Throughput" : float, "Fileput" : float, "SucessfulFiles" : long, "FailedFiles" : long } } :param dict failedFiles: { channelID : int } channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} } """ graph = FTSGraph("sites") result = getStorageElementSiteMapping() if not result['OK']: return result sitesDict = result['Value'] ## create nodes for site, ses in sitesDict.items(): rwDict = self.__getRWAccessForSE(ses) if not rwDict["OK"]: return rwDict siteName = site if '.' in site: siteName = site.split('.')[1] graph.addNode(LCGSite(siteName, {"SEs": rwDict["Value"]})) ## channels { channelID : { "Files" : long , Size = long, "ChannelName" : str, ## "Source" : str, "Destination" : str , ## "ChannelName" : str, "Status" : str } } ## bandwidths { channelID { "Throughput" : float, "Fileput" : float, ## "SucessfulFiles" : long, "FailedFiles" : long } } ## channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} } for channelID, channelDict in channels.items(): sourceName = channelDict["Source"] destName = channelDict["Destination"] fromNode = graph.getNode(sourceName) toNode = graph.getNode(destName) if fromNode and toNode: rwAttrs = { "status": channels[channelID]["Status"], "files": channelDict["Files"], "size": channelDict["Size"], "successfulAttempts": bandwithds[channelID]["SuccessfulFiles"], "failedAttempts": bandwithds[channelID]["FailedFiles"], "distinctFailedFiles": failedFiles.get(channelID, 0), "fileput": bandwithds[channelID]["Fileput"], "throughput": bandwithds[channelID]["Throughput"] } roAttrs = { "channelID": channelID, "channelName": channelDict["ChannelName"], "acceptableFailureRate": self.acceptableFailureRate, "acceptableFailedFiles": self.acceptableFailedFiles, "schedulingType": self.schedulingType } ftsChannel = FTSChannel(fromNode, toNode, rwAttrs, roAttrs) graph.addEdge(ftsChannel) self.ftsGraph = graph self.lastRssUpdate = datetime.datetime.now() return S_OK() def updateGraph(self, rwAccess=False, replicationTree=None, size=0.0): """ update rw access for nodes (sites) and size anf files for edges (channels) """ replicationTree = replicationTree if replicationTree else {} size = size if size else 0.0 ## update nodes rw access for SEs if rwAccess: for lcgSite in self.ftsGraph.nodes(): rwDict = self.__getRWAccessForSE(lcgSite.SEs.keys()) if not rwDict["OK"]: return rwDict lcgSite.SEs = rwDict["Value"] ## update channels size and files if replicationTree: for channel in self.ftsGraph.edges(): if channel.channelID in replicationTree: channel.size += size channel.files += 1 return S_OK() def simple(self, sourceSEs, targetSEs): """ simple strategy - one source, many targets :param list sourceSEs: list with only one sourceSE name :param list targetSEs: list with target SE names :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ ## make targetSEs list unique if len(sourceSEs) != 1: return S_ERROR( "simple: wrong argument supplied for sourceSEs, only one sourceSE allowed" ) sourceSE = sourceSEs[0] tree = {} for targetSE in targetSEs: channel = self.ftsGraph.findChannel(sourceSE, targetSE) if not channel["OK"]: return S_ERROR(channel["Message"]) channel = channel["Value"] if not channel.fromNode.SEs[sourceSE]["read"]: return S_ERROR( "simple: sourceSE '%s' in banned for reading rigth now" % sourceSE) if not channel.toNode.SEs[targetSE]["write"]: return S_ERROR( "simple: targetSE '%s' is banned for writing rigth now" % targetSE) if channel.channelID in tree: return S_ERROR( "simple: unable to create replication tree, channel '%s' cannot be used twice" %\ channel.channelName ) tree[channel.channelID] = { "Ancestor": False, "SourceSE": sourceSE, "DestSE": targetSE, "Strategy": "Simple" } return S_OK(tree) def swarm(self, sourceSEs, targetSEs): """ swarm strategy - one target, many sources, pick up the fastest :param list sourceSEs: list of source SE :param str targetSEs: on element list with name of target SE :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ tree = {} channels = [] if len(targetSEs) > 1: return S_ERROR( "swarm: wrong argument supplied for targetSEs, only one targetSE allowed" ) targetSE = targetSEs[0] ## find channels for sourceSE in sourceSEs: channel = self.ftsGraph.findChannel(sourceSE, targetSE) if not channel["OK"]: self.log.warn("swarm: %s" % channel["Message"]) continue channels.append((sourceSE, channel["Value"])) ## exit - no channels if not channels: return S_ERROR( "swarm: unable to find FTS channels between '%s' and '%s'" % (",".join(sourceSEs), targetSE)) ## filter out non active channels channels = [ (sourceSE, channel) for sourceSE, channel in channels if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and channel.status == "Active" and channel.timeToStart < float("inf") ] ## exit - no active channels if not channels: return S_ERROR( "swarm: no active channels found between %s and %s" % (sourceSEs, targetSE)) ## find min timeToStart minTimeToStart = float("inf") selSourceSE = selChannel = None for sourceSE, ftsChannel in channels: if ftsChannel.timeToStart < minTimeToStart: minTimeToStart = ftsChannel.timeToStart selSourceSE = sourceSE selChannel = ftsChannel if not selSourceSE: return S_ERROR( "swarm: no active channels found between %s and %s" % (sourceSEs, targetSE)) tree[selChannel.channelID] = { "Ancestor": False, "SourceSE": selSourceSE, "DestSE": targetSE, "Strategy": "Swarm" } return S_OK(tree) def minimiseTotalWait(self, sourceSEs, targetSEs): """ find dag that minimises start time :param list sourceSEs: list of avialable source SEs :param list targetSEs: list of target SEs :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ tree = {} primarySources = sourceSEs while targetSEs: minTimeToStart = float("inf") channels = [] for targetSE in targetSEs: for sourceSE in sourceSEs: ftsChannel = self.ftsGraph.findChannel(sourceSE, targetSE) if not ftsChannel["OK"]: self.log.warn("minimiseTotalWait: %s" % ftsChannel["Message"]) continue ftsChannel = ftsChannel["Value"] channels.append((ftsChannel, sourceSE, targetSE)) if not channels: msg = "minimiseTotalWait: FTS channels between %s and %s not defined" % ( ",".join(sourceSEs), ",".join(targetSEs)) self.log.error(msg) return S_ERROR(msg) ## filter out already used channels channels = [(channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels if channel.channelID not in tree] if not channels: msg = "minimiseTotalWait: all FTS channels between %s and %s are already used in tree" % ( ",".join(sourceSEs), ",".join(targetSEs)) self.log.error(msg) return S_ERROR(msg) self.log.debug( "minimiseTotalWait: found %s candiate channels, checking activity" % len(channels)) channels = [ (channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and channel.status == "Active" and channel.timeToStart < float("inf") ] if not channels: self.log.error( "minimiseTotalWait: no active FTS channels found") return S_ERROR( "minimiseTotalWait: no active FTS channels found") candidates = [] for channel, sourceSE, targetSE in channels: timeToStart = channel.timeToStart if sourceSE not in primarySources: timeToStart += self.sigma ## local found if channel.fromNode == channel.toNode: self.log.debug( "minimiseTotalWait: found local channel '%s'" % channel.channelName) candidates = [(channel, sourceSE, targetSE)] break if timeToStart <= minTimeToStart: minTimeToStart = timeToStart candidates = [(channel, sourceSE, targetSE)] elif timeToStart == minTimeToStart: candidates.append((channel, sourceSE, targetSE)) if not candidates: return S_ERROR( "minimiseTotalWait: unable to find candidate FTS channels minimising total wait time" ) random.shuffle(candidates) selChannel, selSourceSE, selTargetSE = candidates[0] ancestor = False for channelID, treeItem in tree.items(): if selSourceSE in treeItem["DestSE"]: ancestor = channelID tree[selChannel.channelID] = { "Ancestor": ancestor, "SourceSE": selSourceSE, "DestSE": selTargetSE, "Strategy": "MinimiseTotalWait" } sourceSEs.append(selTargetSE) targetSEs.remove(selTargetSE) return S_OK(tree) def dynamicThroughput(self, sourceSEs, targetSEs): """ dynamic throughput - many sources, many targets - find dag that minimises overall throughput :param list sourceSEs: list of available source SE names :param list targetSE: list of target SE names :param str lfn: logical file name :param dict metadata: file metadata read from catalogue """ tree = {} primarySources = sourceSEs timeToSite = {} while targetSEs: minTimeToStart = float("inf") channels = [] for targetSE in targetSEs: for sourceSE in sourceSEs: ftsChannel = self.ftsGraph.findChannel(sourceSE, targetSE) if not ftsChannel["OK"]: self.log.warn("dynamicThroughput: %s" % ftsChannel["Message"]) continue ftsChannel = ftsChannel["Value"] channels.append((ftsChannel, sourceSE, targetSE)) ## no candidate channels found if not channels: msg = "dynamicThroughput: FTS channels between %s and %s are not defined" % ( ",".join(sourceSEs), ",".join(targetSEs)) self.log.error(msg) return S_ERROR(msg) ## filter out already used channels channels = [(channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels if channel.channelID not in tree] if not channels: msg = "dynamicThroughput: all FTS channels between %s and %s are already used in tree" % ( ",".join(sourceSEs), ",".join(targetSEs)) self.log.error(msg) return S_ERROR(msg) ## filter out non-active channels self.log.debug( "dynamicThroughput: found %s candidate channels, checking activity" % len(channels)) channels = [ (channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and channel.status == "Active" and channel.timeToStart < float("inf") ] if not channels: self.log.info( "dynamicThroughput: active candidate channels not found") return S_ERROR( "dynamicThroughput: no active candidate FTS channels") candidates = [] selTimeToStart = None for channel, sourceSE, targetSE in channels: timeToStart = channel.timeToStart if sourceSE not in primarySources: timeToStart += self.sigma if sourceSE in timeToSite: timeToStart += timeToSite[sourceSE] ## local found if channel.fromNode == channel.toNode: self.log.debug( "dynamicThroughput: found local channel '%s'" % channel.channelName) candidates = [(channel, sourceSE, targetSE)] selTimeToStart = timeToStart break if timeToStart <= minTimeToStart: selTimeToStart = timeToStart minTimeToStart = timeToStart candidates = [(channel, sourceSE, targetSE)] elif timeToStart == minTimeToStart: candidates.append((channel, sourceSE, targetSE)) if not candidates: return S_ERROR( "dynamicThroughput: unable to find candidate FTS channels") random.shuffle(candidates) selChannel, selSourceSE, selTargetSE = candidates[0] ancestor = False for channelID, treeItem in tree.items(): if selSourceSE in treeItem["DestSE"]: ancestor = channelID tree[selChannel.channelID] = { "Ancestor": ancestor, "SourceSE": selSourceSE, "DestSE": selTargetSE, "Strategy": "DynamicThroughput" } timeToSite[selTargetSE] = selTimeToStart sourceSEs.append(selTargetSE) targetSEs.remove(selTargetSE) return S_OK(tree) def reset(self): """ reset :chosenStrategy: :param self: self reference """ self.chosenStrategy = 0 def getSupportedStrategies(self): """ Get supported strategies. :param self: self reference """ return self.supportedStrategies def replicationTree(self, sourceSEs, targetSEs, size, strategy=None): """ get replication tree :param str lfn: LFN :param list sourceSEs: list of sources SE names to use :param list targetSEs: liost of target SE names to use :param long size: file size :param str strategy: strategy name """ ## update SEs rwAccess every rwUpdatePertion timedelta (default 300 s) now = datetime.datetime.now() if now - self.lastRssUpdate > self.rwUpdatePeriod: update = self.updateGraph(rwAccess=True) if not update["OK"]: self.log.warn( "replicationTree: unable to update FTS graph: %s" % update["Message"]) else: self.lastRssUpdate = now ## get strategy strategy = strategy if strategy else self.__selectStrategy() if strategy not in self.getSupportedStrategies(): return S_ERROR("replicationTree: unsupported strategy '%s'" % strategy) self.log.info( "replicationTree: strategy=%s sourceSEs=%s targetSEs=%s size=%s" %\ ( strategy, sourceSEs, targetSEs, size ) ) ## fire action from dispatcher tree = self.strategyDispatcher[strategy](sourceSEs, targetSEs) if not tree["OK"]: self.log.error("replicationTree: %s" % tree["Message"]) return tree ## update graph edges update = self.updateGraph(replicationTree=tree["Value"], size=size) if not update["OK"]: self.log.error("replicationTree: unable to update FTS graph: %s" % update["Message"]) return update return tree def __selectStrategy(self): """ If more than one active strategy use one after the other. :param self: self reference """ chosenStrategy = self.activeStrategies[self.chosenStrategy] self.chosenStrategy += 1 if self.chosenStrategy == self.numberOfStrategies: self.chosenStrategy = 0 return chosenStrategy def __getRWAccessForSE(self, seList): """ get RSS R/W for :seList: :param list seList: SE list """ rwDict = dict.fromkeys(seList) for se in rwDict: rwDict[se] = {"read": False, "write": False} rAccess = self.resourceStatus.getStorageStatus(seList, statusType="ReadAccess") if not rAccess["OK"]: return rAccess rAccess = [ k for k, v in rAccess["Value"].items() if "ReadAccess" in v and v["ReadAccess"] in ("Active", "Degraded") ] wAccess = self.resourceStatus.getStorageStatus( seList, statusType="WriteAccess") if not wAccess["OK"]: return wAccess wAccess = [ k for k, v in wAccess["Value"].items() if "WriteAccess" in v and v["WriteAccess"] in ("Active", "Degraded") ] for se in rwDict: rwDict[se]["read"] = se in rAccess rwDict[se]["write"] = se in wAccess return S_OK(rwDict)