def isSameSiteSE( se1, se2 ): """ Check if the 2 SEs are at the same site """ dmsHelper = DMSHelpers() site1 = dmsHelper.getLocalSiteForSE( se1 ).get( 'Value' ) site2 = dmsHelper.getLocalSiteForSE( se2 ).get( 'Value' ) return site1 and site2 and site1 == site2
def isSameSiteSE(se1, se2): """ Check if the 2 SEs are at the same site """ dmsHelper = DMSHelpers() site1 = dmsHelper.getLocalSiteForSE(se1).get('Value') site2 = dmsHelper.getLocalSiteForSE(se2).get('Value') return site1 and site2 and site1 == site2
def __init__(self, fromDict=None): """c'tor :param self: self reference :param fromDict: if false, new request. Can be json string that represents the object, or the dictionary directly """ self.__waiting = None now = datetime.datetime.utcnow().replace(microsecond=0) self._CreationTime = now self._SubmitTime = now self._LastUpdate = now # the time before which the request should not be executed # If None, no delay self._NotBefore = now self._Status = "Done" self.JobID = 0 self.Error = None self.DIRACSetup = None self.OwnerDN = None self.RequestName = None self.OwnerGroup = None self._SourceComponent = None self.dmsHelper = DMSHelpers() proxyInfo = getProxyInfo() if proxyInfo["OK"]: proxyInfo = proxyInfo["Value"] if proxyInfo["validGroup"] and proxyInfo["validDN"]: self.OwnerDN = proxyInfo["identity"] self.OwnerGroup = proxyInfo["group"] self.__operations__ = [] if isinstance(fromDict, six.string_types): fromDict = json.loads(fromDict) elif not isinstance(fromDict, dict): fromDict = {} if "Operations" in fromDict: for opDict in fromDict.get("Operations", []): self += Operation(opDict) del fromDict["Operations"] for key, value in fromDict.items(): # The JSON module forces the use of UTF-8, which is not properly # taken into account in DIRAC. # One would need to replace all the '== str' with 'in six.string_types' # This is converting `unicode` to `str` and doesn't make sense in Python 3 if six.PY2 and isinstance(value, six.string_types): value = value.encode() if value: setattr(self, key, value) self._notify()
def __init__(self, fromDict=None): """c'tor :param self: self reference :param fromDict: if false, new request. Can be json string that represents the object, or the dictionary directly """ self.__waiting = None now = datetime.datetime.utcnow().replace(microsecond=0) self._CreationTime = now self._SubmitTime = now self._LastUpdate = now # the time before which the request should not be executed # If None, no delay self._NotBefore = now self._Status = "Done" self.JobID = 0 self.Error = None self.DIRACSetup = None self.OwnerDN = None self.RequestName = None self.OwnerGroup = None self._SourceComponent = None self.dmsHelper = DMSHelpers() proxyInfo = getProxyInfo() if proxyInfo["OK"]: proxyInfo = proxyInfo["Value"] if proxyInfo["validGroup"] and proxyInfo["validDN"]: self.OwnerDN = proxyInfo["identity"] self.OwnerGroup = proxyInfo["group"] self.__operations__ = [] if isinstance(fromDict, str): fromDict = json.loads(fromDict) elif not isinstance(fromDict, dict): fromDict = {} if "Operations" in fromDict: for opDict in fromDict.get("Operations", []): self += Operation(opDict) del fromDict["Operations"] for key, value in fromDict.items(): if value: setattr(self, key, value) self._notify()
def __getSEsFromOptions(dmScript): seList = dmScript.getOption('SEs', []) sites = dmScript.getOption('Sites', []) if sites: siteSEs = [] dmsHelper = DMSHelpers() for site in sites: siteSEs += dmsHelper.getSEsForSite(site).get('Value', []) if seList and siteSEs: seList = list(set(seList) & set(siteSEs)) else: seList += siteSEs return seList
def getSESiteMapping( gridName = '', withSiteLocalSEMapping = False ): """ Returns a dictionary of all SEs and their associated site(s), e.g. {'CERN-RAW':'LCG.CERN.ch','CERN-RDST':'LCG.CERN.ch',...]} Although normally one site exists for a given SE, it is possible over all Grid types to have multiple entries. If gridName is specified, result is restricted to that Grid type. Assumes CS structure of: /Resources/Sites/<GRIDNAME>/<SITENAME> """ dmsHelper = DMSHelpers() storageElements = dmsHelper.getStorageElements() return S_OK( dict( ( se, getSitesForSE( se, gridName = gridName, withSiteLocalSEMapping = withSiteLocalSEMapping ).get( 'Value', [] ) ) \ for se in storageElements ) )
def initializeHandler(cls, serviceInfoDict): """initialize handler""" log = LOG.getSubLogger("initializeHandler") for seName in DMSHelpers().getStorageElements(): se = StorageElement(seName) # TODO: once we finally merge _allProtocolParameters with the # standard paramaters in the StorageBase, this will be much neater for storagePlugin in se.storages: storageParam = storagePlugin._allProtocolParameters # pylint: disable=protected-access if (storageParam.get("Protocol") == "s3" and "Aws_access_key_id" in storageParam and "Aws_secret_access_key" in storageParam): cls._S3Storages[seName] = storagePlugin log.debug("Add %s to the list of usable S3 storages" % seName) break log.info("S3Gateway initialized storages", "%s" % list(cls._S3Storages)) cls._fc = FileCatalog() return S_OK()
def doMaster(self): """ Master method, which looks little bit spaguetti code, sorry ! - It gets all Sites. - It gets all StorageElements As there is no bulk query, it compares with what we have on the database. It queries a portion of them. """ sites = getSites() if not sites["OK"]: return sites sites = sites["Value"] elementNames = sites + DMSHelpers().getStorageElements() # sourceQuery = self.rmClient.selectTransferCache( meta = { 'columns' : [ 'SourceName' ] } ) # if not sourceQuery[ 'OK' ]: # return sourceQuery # sourceQuery = [ element[0] for element in sourceQuery[ 'Value' ] ] # # sourceElementsToQuery = list( set( elementNames ).difference( set( sourceQuery ) ) ) self.log.info("Processing %s" % ", ".join(elementNames)) for metric in ["Quality", "FailedTransfers"]: for direction in ["Source", "Destination"]: # 2 hours of window result = self.doNew((2, elementNames, direction, metric)) if not result["OK"]: self.metrics["failed"].append(result) return S_OK(self.metrics)
def getSEsForSite( siteName, withSiteLocalSEMapping = False ): """ Given a DIRAC site name this method returns a list of corresponding SEs. """ result = DMSHelpers().getSEsForSite( siteName, connectionLevel = 'DOWNLOAD' if withSiteLocalSEMapping else 'LOCAL' ) if not result['OK']: return S_OK( [] ) return result
def __readConf(self): """ read configurations """ # Getting all the possible servers res = getFTS3ServerDict() if not res['OK']: gLogger.error(res['Message']) return res srvDict = res['Value'] serverPolicyType = opHelper().getValue('DataManagement/FTSPlacement/FTS3/ServerPolicy', 'Random') self._serverPolicy = FTS3Utilities.FTS3ServerPolicy(srvDict, serverPolicy=serverPolicyType) # List of third party protocols for transfers self.thirdPartyProtocols = DMSHelpers().getThirdPartyProtocols() self.maxNumberOfThreads = self.am_getOption("MaxThreads", 10) # Number of Operation we treat in one loop self.operationBulkSize = self.am_getOption("OperationBulkSize", 20) # Number of Jobs we treat in one loop self.jobBulkSize = self.am_getOption("JobBulkSize", 20) self.maxFilesPerJob = self.am_getOption("MaxFilesPerJob", 100) self.maxAttemptsPerFile = self.am_getOption("MaxAttemptsPerFile", 256) self.kickDelay = self.am_getOption("KickAssignedHours", 1) self.maxKick = self.am_getOption("KickLimitPerCycle", 100) self.deleteDelay = self.am_getOption("DeleteGraceDays", 180) self.maxDelete = self.am_getOption("DeleteLimitPerCycle", 100) return S_OK()
def export_getTree(self, elementType, elementName): """ Given an element type and name, finds its parent site and returns all descendants of that site. """ gLogger.info('getTree') site = self.getSite(elementType, elementName) if not site: return S_ERROR('No site') siteStatus = rsClient.selectStatusElement( 'Site', 'Status', name=site, meta={'columns': ['StatusType', 'Status']}) if not siteStatus['OK']: return siteStatus tree = {site: {'statusTypes': dict(siteStatus['Value'])}} ces = CSHelpers.getSiteComputingElements(site) cesStatus = rsClient.selectStatusElement( 'Resource', 'Status', name=ces, meta={'columns': ['Name', 'StatusType', 'Status']}) if not cesStatus['OK']: return cesStatus res = DMSHelpers().getSiteSEMapping() if not res['OK']: return res ses = res['Value'][1].get(site, []) sesStatus = rsClient.selectStatusElement( 'Resource', 'Status', name=ses, meta={'columns': ['Name', 'StatusType', 'Status']}) if not sesStatus['OK']: return sesStatus def feedTree(elementsList): elements = {} for elementTuple in elementsList['Value']: name, statusType, status = elementTuple if name not in elements: elements[name] = {} elements[name][statusType] = status return elements tree[site]['ces'] = feedTree(cesStatus) tree[site]['ses'] = feedTree(sesStatus) return S_OK(tree)
def _cleanCommand(self, toDelete=None): """ Clean the spaceTokenOccupancy table from old endpoints :param tuple toDelete: endpoint to remove (endpoint, storage_element_name), e.g. ('httpg://srm-lhcb.cern.ch:8443/srm/managerv2', CERN-RAW) """ if not toDelete: toDelete = [] res = self.rmClient.selectSpaceTokenOccupancyCache() if not res['OK']: return res storedSEsSet = set([(sse[0], sse[1]) for sse in res['Value']]) currentSEsSet = set() currentSEs = DMSHelpers().getStorageElements() for cse in currentSEs: res = CSHelpers.getStorageElementEndpoint(cse) if not res['OK']: self.log.warn("Could not get endpoint", res['Message']) continue endpoint = res['Value'][0] currentSEsSet.add((endpoint, cse)) toDelete = list(storedSEsSet - currentSEsSet) else: toDelete = [toDelete] for ep in toDelete: res = self.rmClient.deleteSpaceTokenOccupancyCache(ep[0], ep[1]) if not res['OK']: self.log.warn("Could not delete entry from SpaceTokenOccupancyCache", res['Message']) return S_OK()
def getStorageElementsHosts(seNames=None): """Get StorageElement host names :param list seNames: possible list of storage element names (if not provided, will use all) :param list plugins: if provided, restrict to a certain list of plugins :return: S_OK() with list of hosts or S_ERROR """ seHosts = [] if seNames is None: seNames = DMSHelpers().getStorageElements() for seName in seNames: try: seHost = getSEHosts(seName) if not seHost["OK"]: gLogger.warn("Could not get SE Host", "SE: %s" % seName) continue if seHost["Value"]: seHosts.extend(seHost["Value"]) except Exception as excp: # pylint: disable=broad-except gLogger.error("Failed to get SE %s information (SE skipped) " % seName) gLogger.exception("Operation finished with exception: ", lException=excp) return S_OK(list(set(seHosts)))
def printSEInfo(voName): fields = ("SE", "Status", "Protocols", "Aliases") records = [] for se in DMSHelpers(voName).getStorageElements( ): # this will get the full list of SEs, not only the vo's ones. seObject = StorageElement(se) if not (seObject.vo and voName in seObject.vo.strip().split(",") or not seObject.voName): continue result = seObject.status() status = [] for statusType in ["Write", "Read"]: if result[statusType]: status.append(statusType) if status: status = "/".join(status) else: status = "InActive" records.append((se, status, ",".join([ seProtocol["Protocol"] for seProtocol in seObject.protocolOptions ]))) gLogger.notice( printTable(fields, records, printOut=False, columnSeparator=" ")) return S_OK()
def __init__(self, vo=None): """The plugin is instanciated once per ``FTS3Operation``, so it is a good place to do global initialization :param str vo: Virtual Organization """ self.vo = vo self.thirdPartyProtocols = DMSHelpers(vo=vo).getThirdPartyProtocols()
def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger(self.plugin + self.transInThread.get( self.transID, ' [NoThread] [%s] ' % self.transID)) # FIXME: This doesn't work (yet) but should soon, will allow scripts to get the context self.log.showHeaders(True)
def __init__( self, fromDict = None ): """c'tor :param self: self reference :param fromDict : if false, new request. Can be json string that represents the object, or the dictionary directly """ self.__waiting = None now = datetime.datetime.utcnow().replace( microsecond = 0 ) self._CreationTime = now self._SubmitTime = now self._LastUpdate = now # the time before which the request should not be executed # If None, no delay self._NotBefore = now self._Status = "Done" self.JobID = 0 self.Error = None self.DIRACSetup = None self.OwnerDN = None self.RequestName = None self.OwnerGroup = None self.SourceComponent = None self.dmsHelper = DMSHelpers() proxyInfo = getProxyInfo() if proxyInfo["OK"]: proxyInfo = proxyInfo["Value"] if proxyInfo["validGroup"] and proxyInfo["validDN"]: self.OwnerDN = proxyInfo["identity"] self.OwnerGroup = proxyInfo["group"] self.__operations__ = [] fromDict = fromDict if isinstance( fromDict, dict )\ else json.loads( fromDict ) if isinstance( fromDict, StringTypes )\ else {} if "Operations" in fromDict: for opDict in fromDict.get( "Operations", [] ): self +=Operation( opDict ) del fromDict["Operations"] for key, value in fromDict.items(): # The JSON module forces the use of UTF-8, which is not properly # taken into account in DIRAC. # One would need to replace all the '== str' with 'in StringTypes' if type( value ) in StringTypes: value = value.encode() if value: setattr( self, key, value ) self._notify()
def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug self.seConfig = {} if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger("%s/PluginUtilities" % plugin)
def getSitesForSE( storageElement, gridName = '', withSiteLocalSEMapping = False ): """ Given a DIRAC SE name this method returns a list of corresponding sites. Optionally restrict to Grid specified by name. """ result = DMSHelpers().getSitesForSE( storageElement, connectionLevel = 'DOWNLOAD' if withSiteLocalSEMapping else 'LOCAL' ) if not result['OK'] or not gridName: return result return S_OK( [site for site in result['Value'] if siteGridName( site ) == gridName] )
def _storeCommand(self, results): """ Stores the results in the cache (SpaceTokenOccupancyCache), and adds records to the StorageOccupancy accounting. :param dict results: something like {'ElementName': 'CERN-HIST-EOS', 'Endpoint': 'httpg://srm-eoslhcb-bis.cern.ch:8443/srm/v2/server', 'Free': 3264963586.10073, 'Total': 8000000000.0} :returns: S_OK/S_ERROR dict """ # Stores in cache res = self.rmClient.addOrModifySpaceTokenOccupancyCache( endpoint=results["Endpoint"], lastCheckTime=datetime.utcnow(), free=results["Free"], total=results["Total"], token=results["ElementName"], ) if not res["OK"]: self.log.error("Error calling addOrModifySpaceTokenOccupancyCache", res["Message"]) return res # Now proceed with the accounting siteRes = DMSHelpers().getLocalSiteForSE(results["ElementName"]) if not siteRes["OK"]: return siteRes accountingDict = { "StorageElement": results["ElementName"], "Endpoint": results["Endpoint"], "Site": siteRes["Value"] if siteRes["Value"] else "unassigned", } results["Used"] = results["Total"] - results["Free"] for sType in ["Total", "Free", "Used"]: spaceTokenAccounting = StorageOccupancy() spaceTokenAccounting.setNowAsStartAndEndTime() spaceTokenAccounting.setValuesFromDict(accountingDict) spaceTokenAccounting.setValueByKey("SpaceType", sType) spaceTokenAccounting.setValueByKey( "Space", int(convertSizeUnits(results[sType], "MB", "B"))) res = gDataStoreClient.addRegister(spaceTokenAccounting) if not res["OK"]: self.log.warn("Could not commit register", res["Message"]) continue return gDataStoreClient.commit()
def _storeCommand(self, results): """ Stores the results in the cache (SpaceTokenOccupancyCache), and adds records to the StorageOccupancy accounting. :param dict results: something like {'ElementName': 'CERN-HIST-EOS', 'Endpoint': 'httpg://srm-eoslhcb-bis.cern.ch:8443/srm/v2/server', 'Free': 3264963586.10073, 'Total': 8000000000.0, 'SpaceReservation': 'LHCb-Disk'} :returns: S_OK/S_ERROR dict """ # Stores in cache res = self.rmClient.addOrModifySpaceTokenOccupancyCache( endpoint=results['Endpoint'], lastCheckTime=datetime.utcnow(), free=results['Free'], total=results['Total'], token=results['ElementName']) if not res['OK']: self.log.error("Error calling addOrModifySpaceTokenOccupancyCache", res['Message']) return res # Now proceed with the accounting siteRes = DMSHelpers().getLocalSiteForSE(results['ElementName']) if not siteRes['OK']: return siteRes accountingDict = { 'StorageElement': results['ElementName'], 'Endpoint': results['Endpoint'], 'Site': siteRes['Value'] if siteRes['Value'] else 'unassigned' } results['Used'] = results['Total'] - results['Free'] for sType in ['Total', 'Free', 'Used']: spaceTokenAccounting = StorageOccupancy() spaceTokenAccounting.setNowAsStartAndEndTime() spaceTokenAccounting.setValuesFromDict(accountingDict) spaceTokenAccounting.setValueByKey('SpaceType', sType) spaceTokenAccounting.setValueByKey( 'Space', int(convertSizeUnits(results[sType], 'MB', 'B'))) res = gDataStoreClient.addRegister(spaceTokenAccounting) if not res['OK']: self.log.warn("Could not commit register", res['Message']) continue return gDataStoreClient.commit()
def export_getTree(self, elementType, elementName): """ Given an element type and name, finds its parent site and returns all descendants of that site. """ site = self.getSite(elementType, elementName) if not site: return S_ERROR('No site') siteStatus = rsClient.selectStatusElement('Site', 'Status', name=site, meta={'columns': ['StatusType', 'Status']}) if not siteStatus['OK']: return siteStatus tree = {site: {'statusTypes': dict(siteStatus['Value'])}} result = getSiteCEMapping() if not result['OK']: return result ces = result['Value'][site] cesStatus = rsClient.selectStatusElement('Resource', 'Status', name=ces, meta={'columns': ['Name', 'StatusType', 'Status']}) if not cesStatus['OK']: return cesStatus res = DMSHelpers().getSiteSEMapping() if not res['OK']: self.log.error('Could not get site to SE mapping', res['Message']) return S_OK() ses = res['Value'][1].get(site, []) sesStatus = rsClient.selectStatusElement('Resource', 'Status', name=list(ses), meta={'columns': ['Name', 'StatusType', 'Status']}) if not sesStatus['OK']: return sesStatus def feedTree(elementsList): elements = {} for elementTuple in elementsList['Value']: name, statusType, status = elementTuple if name not in elements: elements[name] = {} elements[name][statusType] = status return elements tree[site]['ces'] = feedTree(cesStatus) tree[site]['ses'] = feedTree(sesStatus) return S_OK(tree)
def __init__(self, argumentsDict): """Standard constructor""" self.name = COMPONENT_NAME self.log = gLogger.getSubLogger(self.name) self.inputData = argumentsDict["InputData"] self.configuration = argumentsDict["Configuration"] # Warning: this contains not only the SEs but also the file metadata self.fileCatalogResult = argumentsDict["FileCatalog"] # By default put each input data file into a separate directory self.inputDataDirectory = argumentsDict.get("InputDataDirectory", "PerFile") self.jobID = None self.counter = 1 self.availableSEs = DMSHelpers().getStorageElements()
def setSites(self, arg): """ Setter """ from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers try: siteShortNames = DMSHelpers().getShortSiteNames(withStorage=False, tier=(0, 1)) except AttributeError: siteShortNames = { 'CERN': 'LCG.CERN.cern', 'CNAF': 'LCG.CNAF.it', 'GRIDKA': 'LCG.GRIDKA.de', 'NIKHEF': 'LCG.NIKHEF.nl', 'SARA': 'LCG.SARA.nl', 'PIC': 'LCG.PIC.es', 'RAL': 'LCG.RAL.uk', 'IN2P3': 'LCG.IN2P3.fr', 'RRCKI': 'LCG.RRCKI.ru' } sites = arg.split(',') self.options['Sites'] = [ siteShortNames.get(site.upper(), site) for site in sites ] return DIRAC.S_OK()
def __init__(self): """constructor""" cmd.Cmd.__init__(self) self.prompt = "$[/]$" self.bk = LHCB_BKKDBClient() self.diracAPI = DiracLHCb() self.currentPath = '/' self.do_setDataQualityFlags('OK') self.saveParser = argparse.ArgumentParser(description="Save LFNS", prog='save') self.saveParser.add_argument('filename', type=str, help='file name') self.saveParser.add_argument("-f", "--format", help="txt or py") self.saveParser.add_argument("-n", "--num", help="number of files to be saved") self.saveParser.add_argument("-c", "--with-fileCatalog", help="save POOL XML catalog in a given site") self.sites = {} self.sites = DMSHelpers().getShortSiteNames(withStorage=False, tier=(0, 1))
def getSiteSEMapping( gridName = '', withSiteLocalSEMapping = False ): """ Returns a dictionary of all sites and their localSEs as a list, e.g. {'LCG.CERN.ch':['CERN-RAW','CERN-RDST',...]} If gridName is specified, result is restricted to that Grid type. """ result = DMSHelpers().getSiteSEMapping() if not result['OK']: return result if withSiteLocalSEMapping: mapping = result['Value'][2] else: mapping = result['Value'][1] if gridName: mapping = dict( ( site, mapping[site] ) for site in mapping if siteGridName( site ) == gridName ) return S_OK( mapping )
def doMaster(self): """ This method calls the doNew method for each storage element that exists in the CS. """ for name in DMSHelpers().getStorageElements(): # keeping TB as default diskSpace = self.doNew((name, 'MB')) if not diskSpace['OK']: gLogger.warn("Unable to calculate free/total disk space", "name: %s" % name) gLogger.warn(diskSpace['Message']) continue return S_OK()
def getSiteElements(siteName): """ Gets all the computing and storage elements for a given site """ res = DMSHelpers().getSiteSEMapping() if not res["OK"]: return res resources = res["Value"][1].get(siteName, []) res = getQueues(siteName) if not res["OK"]: return res resources = list(resources) + list(res["Value"].get(siteName, [])) return S_OK(resources)
def __init__(self, requestObject=None, log=None, defaultChecksumType='ADLER32'): """ Constructor function, can specify request object to instantiate FailoverTransfer or a new request object is created. """ self.log = log if not self.log: self.log = gLogger.getSubLogger("FailoverTransfer") self.request = requestObject if not self.request: self.request = Request() self.request.RequestName = 'noname_request' self.request.SourceComponent = 'FailoverTransfer' self.defaultChecksumType = defaultChecksumType self.registrationProtocols = DMSHelpers().getRegistrationProtocols()
def export_getSitesResources(self, siteNames): """ Returns dictionary with SEs and CEs for the given site(s). If siteNames is None, all sites are taken into account. :return: S_OK( { site1 : { ces : [ ces ], 'ses' : [ ses ] },... } ) | S_ERROR """ if siteNames is None: res = getSites() if not res["OK"]: self.log.error("Error getting sites", res["Message"]) return res siteNames = res["Value"] if isinstance(siteNames, six.string_types): siteNames = [siteNames] sitesRes = {} for siteName in siteNames: result = getSiteCEMapping() if not result["OK"]: self.log.error("Error getting sites/CEs mapping", result["Message"]) return result res = {} res["ces"] = result["Value"][siteName] # Convert StorageElements to host names result = DMSHelpers().getSiteSEMapping() if not result["OK"]: self.log.error("Error getting sites/SEs mapping", result["Message"]) sitesRes[siteName] = res continue ses = result["Value"][1].get(siteName, []) result = getStorageElementsHosts(ses) if not result["OK"]: self.log.error("Error getting storage element hosts", result["Message"]) return result # Remove duplicates res["ses"] = list(set(result["Value"])) sitesRes[siteName] = res return S_OK(sitesRes)
def getStorageElementEndpoints(storageElements=None): """ get the endpoints of the Storage ELements """ if storageElements is None: storageElements = DMSHelpers().getStorageElements() storageElementEndpoints = [] for se in storageElements: seEndpoint = getStorageElementEndpoint(se) if not seEndpoint['OK']: continue storageElementEndpoints.append(seEndpoint['Value']) return S_OK(list(set(storageElementEndpoints)))
def _storeCommand(self, results): """ _storeCommand Adding records to accounting, on top of what does the derived method. :param dict results: something like {'ElementName': 'CERN-HIST-EOS', 'Endpoint': 'httpg://srm-eoslhcb-bis.cern.ch:8443/srm/v2/server', 'Free': 3264963586.10073, 'Total': 8000000000.0, 'SpaceReservation': 'LHCb-Disk'} :returns: S_OK/S_ERROR dict """ res = super(FreeDiskSpaceCommand, self)._storeCommand(results) if not res['OK']: return res siteRes = DMSHelpers().getLocalSiteForSE(results['ElementName']) if not siteRes['OK']: return siteRes if not siteRes['Value']: return S_OK() spaceReservation = results.get('SpaceReservation') accountingDict = { 'SpaceToken': spaceReservation, 'Endpoint': results['Endpoint'], 'Site': siteRes['Value'] } results['Used'] = results['Total'] - results['Free'] for sType in ['Total', 'Free', 'Used']: spaceTokenAccounting = SpaceToken() spaceTokenAccounting.setNowAsStartAndEndTime() spaceTokenAccounting.setValuesFromDict(accountingDict) spaceTokenAccounting.setValueByKey('SpaceType', sType) spaceTokenAccounting.setValueByKey( 'Space', int(convertSizeUnits(results[sType], 'MB', 'B'))) gDataStoreClient.addRegister(spaceTokenAccounting) gDataStoreClient.commit() return S_OK()
def __init__( self, plugin = 'Standard', transClient = None, dataManager = None, fc = None, debug = False, transInThread = None, transID = None ): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug self.seConfig = {} if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger( "%s/PluginUtilities" % plugin )
class StorageElementItem(object): """ .. class:: StorageElement common interface to the grid storage element self.name is the resolved name of the StorageElement i.e CERN-tape self.options is dictionary containing the general options defined in the CS e.g. self.options['Backend] = 'Castor2' self.storages is a list of the stub objects created by StorageFactory for the protocols found in the CS. self.localPlugins is a list of the local protocols that were created by StorageFactory self.remotePlugins is a list of the remote protocols that were created by StorageFactory self.protocolOptions is a list of dictionaries containing the options found in the CS. (should be removed) dynamic method:: retransferOnlineFile( lfn ) exists( lfn ) isFile( lfn ) getFile( lfn, localPath = False ) putFile( lfnLocal, sourceSize = 0 ) : {lfn:local} replicateFile( lfn, sourceSize = 0 ) getFileMetadata( lfn ) getFileSize( lfn ) removeFile( lfn ) prestageFile( lfn, lifetime = 86400 ) prestageFileStatus( lfn ) pinFile( lfn, lifetime = 60 * 60 * 24 ) releaseFile( lfn ) isDirectory( lfn ) getDirectoryMetadata( lfn ) getDirectorySize( lfn ) listDirectory( lfn ) removeDirectory( lfn, recursive = False ) createDirectory( lfn ) putDirectory( lfn ) getDirectory( lfn, localPath = False ) """ __deprecatedArguments = ["singleFile", "singleDirectory"] # Arguments that are now useless # Some methods have a different name in the StorageElement and the plugins... # We could avoid this static list in the __getattr__ by checking the storage plugin and so on # but fine... let's not be too smart, otherwise it becomes unreadable :-) __equivalentMethodNames = {"exists": "exists", "isFile": "isFile", "getFile": "getFile", "putFile": "putFile", "replicateFile": "putFile", "getFileMetadata": "getFileMetadata", "getFileSize": "getFileSize", "removeFile": "removeFile", "prestageFile": "prestageFile", "prestageFileStatus": "prestageFileStatus", "pinFile": "pinFile", "releaseFile": "releaseFile", "isDirectory": "isDirectory", "getDirectoryMetadata": "getDirectoryMetadata", "getDirectorySize": "getDirectorySize", "listDirectory": "listDirectory", "removeDirectory": "removeDirectory", "createDirectory": "createDirectory", "putDirectory": "putDirectory", "getDirectory": "getDirectory"} # We can set default argument in the __executeFunction which impacts all plugins __defaultsArguments = {"putFile": {"sourceSize": 0}, "getFile": {"localPath": False}, "prestageFile": {"lifetime": 86400}, "pinFile": {"lifetime": 60 * 60 * 24}, "removeDirectory": {"recursive": False}, "getDirectory": {"localPath": False}} def __init__(self, name, plugins=None, vo=None, hideExceptions=False): """ c'tor :param str name: SE name :param list plugins: requested storage plugins :param vo: vo """ self.methodName = None if vo: self.vo = vo else: result = getVOfromProxyGroup() if not result['OK']: return self.vo = result['Value'] self.opHelper = Operations(vo=self.vo) # These things will soon have to go as well. 'AccessProtocol.1' is all but flexible. proxiedProtocols = gConfig.getValue('/LocalSite/StorageElements/ProxyProtocols', "").split(',') self.useProxy = ( gConfig.getValue( "/Resources/StorageElements/%s/AccessProtocol.1/Protocol" % name, "UnknownProtocol") in proxiedProtocols) if not self.useProxy: self.useProxy = gConfig.getValue('/LocalSite/StorageElements/%s/UseProxy' % name, False) if not self.useProxy: self.useProxy = self.opHelper.getValue('/Services/StorageElements/%s/UseProxy' % name, False) self.valid = True if plugins is None: res = StorageFactory( useProxy=self.useProxy, vo=self.vo).getStorages( name, pluginList=[], hideExceptions=hideExceptions) else: res = StorageFactory( useProxy=self.useProxy, vo=self.vo).getStorages( name, pluginList=plugins, hideExceptions=hideExceptions) if not res['OK']: self.valid = False self.name = name self.errorReason = res['Message'] else: factoryDict = res['Value'] self.name = factoryDict['StorageName'] self.options = factoryDict['StorageOptions'] self.localPlugins = factoryDict['LocalPlugins'] self.remotePlugins = factoryDict['RemotePlugins'] self.storages = factoryDict['StorageObjects'] self.protocolOptions = factoryDict['ProtocolOptions'] self.turlProtocols = factoryDict['TurlProtocols'] for storage in self.storages: storage.setStorageElement(self) self.log = gLogger.getSubLogger("SE[%s]" % self.name) if self.valid: self.useCatalogURL = gConfig.getValue( '/Resources/StorageElements/%s/UseCatalogURL' % self.name, False) self.log.debug("useCatalogURL: %s" % self.useCatalogURL) self.__dmsHelper = DMSHelpers(vo=vo) # Allow SE to overwrite general operation config accessProto = self.options.get('AccessProtocols') self.localAccessProtocolList = accessProto if accessProto else self.__dmsHelper.getAccessProtocols() self.log.debug("localAccessProtocolList %s" % self.localAccessProtocolList) writeProto = self.options.get('WriteProtocols') self.localWriteProtocolList = writeProto if writeProto else self.__dmsHelper.getWriteProtocols() self.log.debug("localWriteProtocolList %s" % self.localWriteProtocolList) # 'getTransportURL', self.readMethods = ['getFile', 'prestageFile', 'prestageFileStatus', 'getDirectory'] self.writeMethods = ['retransferOnlineFile', 'putFile', 'replicateFile', 'pinFile', 'releaseFile', 'createDirectory', 'putDirectory'] self.removeMethods = ['removeFile', 'removeDirectory'] self.checkMethods = ['exists', 'getDirectoryMetadata', 'getDirectorySize', 'getFileSize', 'getFileMetadata', 'listDirectory', 'isDirectory', 'isFile', 'getOccupancy' ] self.okMethods = ['getLocalProtocols', 'getProtocols', 'getRemoteProtocols', 'storageElementName', 'getStorageParameters', 'getTransportURL', 'isLocalSE'] self.__fileCatalog = None def dump(self): """ Dump to the logger a summary of the StorageElement items. """ log = self.log.getSubLogger('dump', True) log.verbose("Preparing dump for StorageElement %s." % self.name) if not self.valid: log.debug("Failed to create StorageElement plugins.", self.errorReason) return i = 1 outStr = "\n\n============ Options ============\n" for key in sorted(self.options): outStr = "%s%s: %s\n" % (outStr, key.ljust(15), self.options[key]) for storage in self.storages: outStr = "%s============Protocol %s ============\n" % (outStr, i) storageParameters = storage.getParameters() for key in sorted(storageParameters): outStr = "%s%s: %s\n" % (outStr, key.ljust(15), storageParameters[key]) i = i + 1 log.verbose(outStr) ################################################################################################# # # These are the basic get functions for storage configuration # def getStorageElementName(self): """ SE name getter for backward compatibility """ return S_OK(self.storageElementName()) def storageElementName(self): """ SE name getter """ self.log.getSubLogger('storageElementName').verbose( "The Storage Element name is %s." % self.name) return self.name def getChecksumType(self): """ Checksum type getter for backward compatibility """ return S_OK(self.checksumType()) def checksumType(self): """ get specific /Resources/StorageElements/<SEName>/ChecksumType option if defined, otherwise global /Resources/StorageElements/ChecksumType """ self.log.getSubLogger('checksumType').verbose("get checksum type for %s." % self.name) return self.options["ChecksumType"].upper() if "ChecksumType" in self.options else gConfig.getValue( "/Resources/StorageElements/ChecksumType", "ADLER32").upper() def getStatus(self): """ Return Status of the SE only if the SE is valid It returns an S_OK/S_ERROR structure """ valid = self.isValid() if not valid['OK']: return valid return S_OK(self.status()) def isSameSE(self, otherSE): """ Compares two SE together and tries to guess if the two SEs are pointing at the same location from the namespace point of view. This is primarily aimed at avoiding to overwrite a file with itself, in particular where the difference is only the SRM spacetoken. Two SEs are considered to be the same if they have a couple (Host, Path) in common among their various protocols :param otherSE: the storage element to which we compare :returns: boolean. True if the two SEs are the same. """ # If the two objects are the same, it is obviously the same SE if self == otherSE: return True # Otherwise, we build the list of (Host, Path) couples selfEndpoints = set() otherSEEndpoints = set() for storage in self.storages: storageParam = storage.getParameters() selfEndpoints.add((storageParam['Host'], storageParam['Path'])) for storage in otherSE.storages: storageParam = storage.getParameters() otherSEEndpoints.add((storageParam['Host'], storageParam['Path'])) # The two SEs are the same if they have at least one couple in common return bool(selfEndpoints & otherSEEndpoints) def getOccupancy(self, unit='MB', **kwargs): """ Retrieves the space information about the storage. It returns the Total and Free space. It loops over the different Storage Plugins to query it. :params occupancyLFN: (named param) LFN where to find the space reporting json file on the storage The json file should contain the Free and Total space in MB. If not specified, the default path will be </vo/occupancy.json> :returns: S_OK with dict (keys: Total, Free) """ log = self.log.getSubLogger('getOccupancy', True) # Mandatory parameters mandatoryParams = set(['Total', 'Free']) if 'occupancyLFN' not in kwargs: occupancyLFN = self.options.get('OccupancyLFN') if not occupancyLFN: occupancyLFN = os.path.join('/', self.vo, DEFAULT_OCCUPANCY_FILE) kwargs['occupancyLFN'] = occupancyLFN filteredPlugins = self.__filterPlugins('getOccupancy') if not filteredPlugins: return S_ERROR(errno.EPROTONOSUPPORT, "No storage plugins to query the occupancy") # Try all of the storages one by one for storage in filteredPlugins: # The result of the plugin is always in MB res = storage.getOccupancy(**kwargs) if res['OK']: occupancyDict = res['Value'] # Make sure all the mandatory parameters are present if set(occupancyDict) & mandatoryParams != mandatoryParams: log.verbose("Missing mandatory parameters", mandatoryParams - set(occupancyDict)) continue if unit != 'MB': for space in ['Total', 'Free']: convertedSpace = convertSizeUnits(occupancyDict[space], 'MB', unit) # If we have a conversion error, we go to the next plugin if convertedSpace == -sys.maxsize: log.verbose( "Error converting %s space from MB to %s: %s" % (space, unit, occupancyDict[space])) break occupancyDict[space] = convertedSpace return res return S_ERROR("Could not retrieve the occupancy from any plugin") def status(self): """ Return Status of the SE, a dictionary with: * Read: True (is allowed), False (it is not allowed) * Write: True (is allowed), False (it is not allowed) * Remove: True (is allowed), False (it is not allowed) * Check: True (is allowed), False (it is not allowed). .. note:: Check is always allowed IF Read is allowed (regardless of what set in the Check option of the configuration) * DiskSE: True if TXDY with Y > 0 (defaults to True) * TapeSE: True if TXDY with X > 0 (defaults to False) * TotalCapacityTB: float (-1 if not defined) * DiskCacheTB: float (-1 if not defined) It returns directly the dictionary """ self.log.getSubLogger('getStatus').verbose("determining status of %s." % self.name) retDict = {} if not self.valid: retDict['Read'] = False retDict['Write'] = False retDict['Remove'] = False retDict['Check'] = False retDict['DiskSE'] = False retDict['TapeSE'] = False retDict['TotalCapacityTB'] = -1 retDict['DiskCacheTB'] = -1 return retDict # If nothing is defined in the CS Access is allowed # If something is defined, then it must be set to Active retDict['Read'] = not ( 'ReadAccess' in self.options and self.options['ReadAccess'] not in ( 'Active', 'Degraded')) retDict['Write'] = not ( 'WriteAccess' in self.options and self.options['WriteAccess'] not in ( 'Active', 'Degraded')) retDict['Remove'] = not ( 'RemoveAccess' in self.options and self.options['RemoveAccess'] not in ( 'Active', 'Degraded')) if retDict['Read']: retDict['Check'] = True else: retDict['Check'] = not ( 'CheckAccess' in self.options and self.options['CheckAccess'] not in ( 'Active', 'Degraded')) diskSE = True tapeSE = False if 'SEType' in self.options: # Type should follow the convention TXDY seType = self.options['SEType'] diskSE = re.search('D[1-9]', seType) is not None tapeSE = re.search('T[1-9]', seType) is not None retDict['DiskSE'] = diskSE retDict['TapeSE'] = tapeSE try: retDict['TotalCapacityTB'] = float(self.options['TotalCapacityTB']) except Exception: retDict['TotalCapacityTB'] = -1 try: retDict['DiskCacheTB'] = float(self.options['DiskCacheTB']) except Exception: retDict['DiskCacheTB'] = -1 return retDict def isValid(self, operation=None): """ check CS/RSS statuses for :operation: :param str operation: operation name """ log = self.log.getSubLogger('isValid', True) log.verbose("Determining if the StorageElement %s is valid for VO %s" % (self.name, self.vo)) if not self.valid: log.debug("Failed to create StorageElement plugins.", self.errorReason) return S_ERROR("SE.isValid: Failed to create StorageElement plugins: %s" % self.errorReason) # Check if the Storage Element is eligible for the user's VO if 'VO' in self.options and self.vo not in self.options['VO']: log.debug("StorageElement is not allowed for VO", self.vo) return S_ERROR(errno.EACCES, "StorageElement.isValid: StorageElement is not allowed for VO") log.verbose( "Determining if the StorageElement %s is valid for operation '%s'" % (self.name, operation)) if (not operation) or (operation in self.okMethods): return S_OK() # Determine whether the StorageElement is valid for checking, reading, writing status = self.status() checking = status['Check'] reading = status['Read'] writing = status['Write'] removing = status['Remove'] # Determine whether the requested operation can be fulfilled if (not operation) and (not reading) and (not writing) and (not checking): log.debug("Read, write and check access not permitted.") return S_ERROR(errno.EACCES, "SE.isValid: Read, write and check access not permitted.") # The supplied operation can be 'Read','Write' or any of the possible StorageElement methods. if (operation in self.readMethods) or (operation.lower() in ('read', 'readaccess')): operation = 'ReadAccess' elif operation in self.writeMethods or (operation.lower() in ('write', 'writeaccess')): operation = 'WriteAccess' elif operation in self.removeMethods or (operation.lower() in ('remove', 'removeaccess')): operation = 'RemoveAccess' elif operation in self.checkMethods or (operation.lower() in ('check', 'checkaccess')): operation = 'CheckAccess' else: log.debug("The supplied operation is not known.", operation) return S_ERROR(DErrno.ENOMETH, "SE.isValid: The supplied operation is not known.") log.debug("check the operation: %s " % operation) # Check if the operation is valid if operation == 'CheckAccess': if not reading: if not checking: log.debug("Check access not currently permitted.") return S_ERROR(errno.EACCES, "SE.isValid: Check access not currently permitted.") if operation == 'ReadAccess': if not reading: log.debug("Read access not currently permitted.") return S_ERROR(errno.EACCES, "SE.isValid: Read access not currently permitted.") if operation == 'WriteAccess': if not writing: log.debug("Write access not currently permitted.") return S_ERROR(errno.EACCES, "SE.isValid: Write access not currently permitted.") if operation == 'RemoveAccess': if not removing: log.debug("Remove access not currently permitted.") return S_ERROR(errno.EACCES, "SE.isValid: Remove access not currently permitted.") return S_OK() def getPlugins(self): """ Get the list of all the plugins defined for this Storage Element """ self.log.getSubLogger('getPlugins').verbose("Obtaining all plugins of %s." % self.name) if not self.valid: return S_ERROR(self.errorReason) allPlugins = self.localPlugins + self.remotePlugins return S_OK(allPlugins) def getRemotePlugins(self): """ Get the list of all the remote access protocols defined for this Storage Element """ self.log.getSubLogger('getRemotePlugins').verbose( "Obtaining remote protocols for %s." % self.name) if not self.valid: return S_ERROR(self.errorReason) return S_OK(self.remotePlugins) def getLocalPlugins(self): """ Get the list of all the local access protocols defined for this Storage Element """ self.log.getSubLogger('getLocalPlugins').verbose( "Obtaining local protocols for %s." % self.name) if not self.valid: return S_ERROR(self.errorReason) return S_OK(self.localPlugins) def getStorageParameters(self, plugin=None, protocol=None): """ Get plugin specific options :param plugin : plugin we are interested in :param protocol: protocol we are interested in Either plugin or protocol can be defined, not both, but at least one of them """ # both set if plugin and protocol: return S_ERROR(errno.EINVAL, "plugin and protocol cannot be set together.") # both None elif not (plugin or protocol): return S_ERROR(errno.EINVAL, "plugin and protocol cannot be None together.") log = self.log.getSubLogger('getStorageParameters') reqStr = "plugin %s" % plugin if plugin else "protocol %s" % protocol log.verbose("Obtaining storage parameters for %s for %s." % (self.name, reqStr)) for storage in self.storages: storageParameters = storage.getParameters() if plugin and storageParameters['PluginName'] == plugin: return S_OK(storageParameters) elif protocol and storageParameters['Protocol'] == protocol: return S_OK(storageParameters) errStr = "Requested plugin or protocol not available." log.debug(errStr, "%s for %s" % (reqStr, self.name)) return S_ERROR(errno.ENOPROTOOPT, errStr) def __getAllProtocols(self, protoType): """ Returns the list of all protocols for Input or Output :param proto = InputProtocols or OutputProtocols """ return set(reduce(lambda x, y: x + y, [plugin.protocolParameters[protoType] for plugin in self.storages])) def _getAllInputProtocols(self): """ Returns all the protocols supported by the SE for Input """ return self.__getAllProtocols('InputProtocols') def _getAllOutputProtocols(self): """ Returns all the protocols supported by the SE for Output """ return self.__getAllProtocols('OutputProtocols') def generateTransferURLsBetweenSEs(self, lfns, sourceSE, protocols=None): """ This negociate the URLs to be used for third party copy. This is mostly useful for FTS. If protocols is given, it restricts the list of plugins to use :param lfns: list/dict of lfns to generate the URLs :param sourceSE: storageElement instance of the sourceSE :param protocols: ordered protocol restriction list :return:dictionnary Successful/Failed with pair (src, dest) urls """ log = self.log.getSubLogger('generateTransferURLsBetweenSEs') result = checkArgumentFormat(lfns) if result['OK']: lfns = result['Value'] else: errStr = "Supplied urls must be string, list of strings or a dictionary." log.debug(errStr) return S_ERROR(errno.EINVAL, errStr) # First, find common protocols to use res = self.negociateProtocolWithOtherSE(sourceSE, protocols=protocols) if not res['OK']: return res commonProtocols = res['Value'] # Taking each protocol at the time, we try to generate src and dest URLs for proto in commonProtocols: srcPlugin = None destPlugin = None log.debug("Trying to find plugins for protocol %s" % proto) # Finding the source storage plugin for storagePlugin in sourceSE.storages: log.debug("Testing %s as source plugin" % storagePlugin.pluginName) storageParameters = storagePlugin.getParameters() nativeProtocol = storageParameters['Protocol'] # If the native protocol of the plugin is allowed for read if nativeProtocol in sourceSE.localAccessProtocolList: # If the plugin can generate the protocol we are interested in if proto in storageParameters['OutputProtocols']: log.debug("Selecting it") srcPlugin = storagePlugin break # If we did not find a source plugin, continue if srcPlugin is None: log.debug("Could not find a source plugin for protocol %s" % proto) continue # Finding the destination storage plugin for storagePlugin in self.storages: log.debug("Testing %s as destination plugin" % storagePlugin.pluginName) storageParameters = storagePlugin.getParameters() nativeProtocol = storageParameters['Protocol'] # If the native protocol of the plugin is allowed for write if nativeProtocol in self.localWriteProtocolList: # If the plugin can accept the protocol we are interested in if proto in storageParameters['InputProtocols']: log.debug("Selecting it") destPlugin = storagePlugin break # If we found both a source and destination plugin, we are happy, # otherwise we continue with the next protocol if destPlugin is None: log.debug("Could not find a destination plugin for protocol %s" % proto) srcPlugin = None continue failed = {} successful = {} # Generate the URLs for lfn in lfns: # Source URL first res = srcPlugin.constructURLFromLFN(lfn, withWSUrl=True) if not res['OK']: errMsg = "Error generating source url: %s" % res['Message'] gLogger.debug("Error generating source url", errMsg) failed[lfn] = errMsg continue srcURL = res['Value'] # Destination URL res = destPlugin.constructURLFromLFN(lfn, withWSUrl=True) if not res['OK']: errMsg = "Error generating destination url: %s" % res['Message'] gLogger.debug("Error generating destination url", errMsg) failed[lfn] = errMsg continue destURL = res['Value'] successful[lfn] = (srcURL, destURL) return S_OK({'Successful': successful, 'Failed': failed}) return S_ERROR(errno.ENOPROTOOPT, "Could not find a protocol ") def negociateProtocolWithOtherSE(self, sourceSE, protocols=None): """ Negotiate what protocol could be used for a third party transfer between the sourceSE and ourselves. If protocols is given, the chosen protocol has to be among those :param sourceSE : storageElement instance of the sourceSE :param protocols: ordered protocol restriction list :return: a list protocols that fits the needs, or None """ # No common protocols if this is a proxy storage if self.useProxy: return S_OK([]) log = self.log.getSubLogger('negociateProtocolWithOtherSE', child=True) log.debug( "Negociating protocols between %s and %s (protocols %s)" % (sourceSE.name, self.name, protocols)) # Take all the protocols the destination can accept as input destProtocols = self._getAllInputProtocols() log.debug("Destination input protocols %s" % destProtocols) # Take all the protocols the source can provide sourceProtocols = sourceSE._getAllOutputProtocols() log.debug("Source output protocols %s" % sourceProtocols) commonProtocols = destProtocols & sourceProtocols # If a restriction list is defined # take the intersection, and sort the commonProtocols # based on the protocolList order if protocols: protocolList = list(protocols) commonProtocols = sorted( commonProtocols & set(protocolList), key=lambda x: self.__getIndexInList( x, protocolList)) log.debug("Common protocols %s" % commonProtocols) return S_OK(list(commonProtocols)) ################################################################################################# # # These are the basic get functions for lfn manipulation # def __getURLPath(self, url): """ Get the part of the URL path below the basic storage path. This path must coincide with the LFN of the file in order to be compliant with the DIRAC conventions. """ log = self.log.getSubLogger('__getURLPath') log.verbose("Getting path from url in %s." % self.name) if not self.valid: return S_ERROR(self.errorReason) res = pfnparse(url) if not res['OK']: return res fullURLPath = '%s/%s' % (res['Value']['Path'], res['Value']['FileName']) # Check all available storages and check whether the url is for that protocol urlPath = '' for storage in self.storages: res = storage.isNativeURL(url) if res['OK']: if res['Value']: parameters = storage.getParameters() saPath = parameters['Path'] if not saPath: # If the sa path doesn't exist then the url path is the entire string urlPath = fullURLPath else: if re.search(saPath, fullURLPath): # Remove the sa path from the fullURLPath urlPath = fullURLPath.replace(saPath, '') if urlPath: return S_OK(urlPath) # This should never happen. DANGER!! errStr = "Failed to get the url path for any of the protocols!!" log.debug(errStr) return S_ERROR(errStr) def getLFNFromURL(self, urls): """ Get the LFN from the PFNS . :param lfn : input lfn or lfns (list/dict) """ result = checkArgumentFormat(urls) if result['OK']: urlDict = result['Value'] else: errStr = "Supplied urls must be string, list of strings or a dictionary." self.log.getSubLogger('getLFNFromURL').debug(errStr) return S_ERROR(errno.EINVAL, errStr) retDict = {"Successful": {}, "Failed": {}} for url in urlDict: res = self.__getURLPath(url) if res["OK"]: retDict["Successful"][url] = res["Value"] else: retDict["Failed"][url] = res["Message"] return S_OK(retDict) ########################################################################################### # # This is the generic wrapper for file operations # def getURL(self, lfn, protocol=False, replicaDict=None): """ execute 'getTransportURL' operation. :param str lfn: string, list or dictionary of lfns :param protocol: if no protocol is specified, we will request self.turlProtocols :param replicaDict: optional results from the File Catalog replica query """ self.log.getSubLogger('getURL').verbose("Getting accessUrl %s for lfn in %s." % ("(%s)" % protocol if protocol else "", self.name)) if not protocol: # This turlProtocols seems totally useless. # Get ride of it when gfal2 is totally ready # and replace it with the localAccessProtocol list protocols = self.turlProtocols elif isinstance(protocol, list): protocols = protocol elif isinstance(protocol, basestring): protocols = [protocol] self.methodName = "getTransportURL" result = self.__executeMethod(lfn, protocols=protocols) return result def __isLocalSE(self): """ Test if the Storage Element is local in the current context """ self.log.getSubLogger('LocalSE').verbose("Determining whether %s is a local SE." % self.name) import DIRAC localSEs = getSEsForSite(DIRAC.siteName())['Value'] if self.name in localSEs: return S_OK(True) else: return S_OK(False) def __getFileCatalog(self): if not self.__fileCatalog: self.__fileCatalog = FileCatalog(vo=self.vo) return self.__fileCatalog def __generateURLDict(self, lfns, storage, replicaDict=None): """ Generates a dictionary (url : lfn ), where the url are constructed from the lfn using the constructURLFromLFN method of the storage plugins. :param: lfns : dictionary {lfn:whatever} :returns dictionary {constructed url : lfn} """ log = self.log.getSubLogger("__generateURLDict") log.verbose("generating url dict for %s lfn in %s." % (len(lfns), self.name)) if not replicaDict: replicaDict = {} urlDict = {} # url : lfn failed = {} # lfn : string with errors for lfn in lfns: if self.useCatalogURL: # Is this self.name alias proof? url = replicaDict.get(lfn, {}).get(self.name, '') if url: urlDict[url] = lfn continue else: fc = self.__getFileCatalog() result = fc.getReplicas() if not result['OK']: failed[lfn] = result['Message'] url = result['Value']['Successful'].get(lfn, {}).get(self.name, '') if not url: failed[lfn] = 'Failed to get catalog replica' else: # Update the URL according to the current SE description result = returnSingleResult(storage.updateURL(url)) if not result['OK']: failed[lfn] = result['Message'] else: urlDict[result['Value']] = lfn else: result = storage.constructURLFromLFN(lfn, withWSUrl=True) if not result['OK']: errStr = result['Message'] log.debug(errStr, 'for %s' % (lfn)) failed[lfn] = "%s %s" % (failed[lfn], errStr) if lfn in failed else errStr else: urlDict[result['Value']] = lfn res = S_OK({'Successful': urlDict, 'Failed': failed}) # res['Failed'] = failed return res @staticmethod def __getIndexInList(x, l): """ Return the index of the element x in the list l or sys.maxint if it does not exist :param x: element to look for :param l: list to look int :return: the index or sys.maxint """ try: return l.index(x) except ValueError: return sys.maxsize def __filterPlugins(self, methodName, protocols=None, inputProtocol=None): """ Determine the list of plugins that can be used for a particular action Args: method(str): method to execute protocols(list): specific protocols might be requested inputProtocol(str): in case the method is putFile, this specifies the protocol given as source Returns: list: list of storage plugins """ log = self.log.getSubLogger('__filterPlugins', child=True) log.debug( "Filtering plugins for %s (protocol = %s ; inputProtocol = %s)" % (methodName, protocols, inputProtocol)) if isinstance(protocols, basestring): protocols = [protocols] pluginsToUse = [] potentialProtocols = [] allowedProtocols = [] if methodName in self.readMethods + self.checkMethods: allowedProtocols = self.localAccessProtocolList elif methodName in self.removeMethods + self.writeMethods: allowedProtocols = self.localWriteProtocolList else: # OK methods # If a protocol or protocol list is specified, we only use the plugins that # can generate such protocol # otherwise we return them all if protocols: setProtocol = set(protocols) for plugin in self.storages: if set(plugin.protocolParameters.get("OutputProtocols", [])) & setProtocol: log.debug("Plugin %s can generate compatible protocol" % plugin.pluginName) pluginsToUse.append(plugin) else: pluginsToUse = self.storages # The closest list for "OK" methods is the AccessProtocol preference, so we sort based on that pluginsToUse.sort( key=lambda x: self.__getIndexInList( x.protocolParameters['Protocol'], self.localAccessProtocolList)) log.debug("Plugins to be used for %s: %s" % (methodName, [p.pluginName for p in pluginsToUse])) return pluginsToUse log.debug("Allowed protocol: %s" % allowedProtocols) # if a list of protocol is specified, take it into account if protocols: potentialProtocols = list(set(allowedProtocols) & set(protocols)) else: potentialProtocols = allowedProtocols log.debug('Potential protocols %s' % potentialProtocols) localSE = self.__isLocalSE()['Value'] for plugin in self.storages: # Determine whether to use this storage object pluginParameters = plugin.getParameters() pluginName = pluginParameters.get('PluginName') if not pluginParameters: log.debug("Failed to get storage parameters.", "%s %s" % (self.name, pluginName)) continue if not (pluginName in self.remotePlugins) and not localSE and not pluginName == "Proxy": # If the SE is not local then we can't use local protocols log.debug("Local protocol not appropriate for remote use: %s." % pluginName) continue if pluginParameters['Protocol'] not in potentialProtocols: log.debug("Plugin %s not allowed for %s." % (pluginName, methodName)) continue # If we are attempting a putFile and we know the inputProtocol if methodName == 'putFile' and inputProtocol: if inputProtocol not in pluginParameters['InputProtocols']: log.debug( "Plugin %s not appropriate for %s protocol as input." % (pluginName, inputProtocol)) continue pluginsToUse.append(plugin) # sort the plugins according to the lists in the CS pluginsToUse.sort( key=lambda x: self.__getIndexInList( x.protocolParameters['Protocol'], allowedProtocols)) log.debug("Plugins to be used for %s: %s" % (methodName, [p.pluginName for p in pluginsToUse])) return pluginsToUse def __executeMethod(self, lfn, *args, **kwargs): """ Forward the call to each storage in turn until one works. The method to be executed is stored in self.methodName :param lfn : string, list or dictionary :param *args : variable amount of non-keyword arguments. SHOULD BE EMPTY :param **kwargs : keyword arguments :returns S_OK( { 'Failed': {lfn : reason} , 'Successful': {lfn : value} } ) The Failed dict contains the lfn only if the operation failed on all the storages The Successful dict contains the value returned by the successful storages. A special kwargs is 'inputProtocol', which can be specified for putFile. It describes the protocol used as source protocol, since there is in principle only one. """ removedArgs = {} log = self.log.getSubLogger('__executeMethod') log.verbose("preparing the execution of %s" % (self.methodName)) # args should normaly be empty to avoid problem... if args: log.verbose("args should be empty!%s" % args) # because there is normally only one kw argument, I can move it from args to kwargs methDefaultArgs = StorageElementItem.__defaultsArguments.get(self.methodName, {}).keys() if methDefaultArgs: kwargs[methDefaultArgs[0]] = args[0] args = args[1:] log.verbose( "put it in kwargs, but dirty and might be dangerous!args %s kwargs %s" % (args, kwargs)) # We check the deprecated arguments for depArg in StorageElementItem.__deprecatedArguments: if depArg in kwargs: log.verbose("%s is not an allowed argument anymore. Please change your code!" % depArg) removedArgs[depArg] = kwargs[depArg] del kwargs[depArg] # Set default argument if any methDefaultArgs = StorageElementItem.__defaultsArguments.get(self.methodName, {}) for argName in methDefaultArgs: if argName not in kwargs: log.debug("default argument %s for %s not present.\ Setting value %s." % (argName, self.methodName, methDefaultArgs[argName])) kwargs[argName] = methDefaultArgs[argName] res = checkArgumentFormat(lfn) if not res['OK']: errStr = "Supplied lfns must be string, list of strings or a dictionary." log.debug(errStr) return res lfnDict = res['Value'] log.verbose( "Attempting to perform '%s' operation with %s lfns." % (self.methodName, len(lfnDict))) res = self.isValid(operation=self.methodName) if not res['OK']: return res else: if not self.valid: return S_ERROR(self.errorReason) # In case executing putFile, we can assume that all the source urls # are from the same protocol. This optional parameter, if defined # can be used to ignore some storage plugins and thus save time # and avoid fake failures showing in the accounting inputProtocol = kwargs.pop('inputProtocol', None) successful = {} failed = {} filteredPlugins = self.__filterPlugins(self.methodName, kwargs.get('protocols'), inputProtocol) if not filteredPlugins: return S_ERROR(errno.EPROTONOSUPPORT, "No storage plugins matching the requirements\ (operation %s protocols %s inputProtocol %s)" % (self.methodName, kwargs.get('protocols'), inputProtocol)) # Try all of the storages one by one for storage in filteredPlugins: # Determine whether to use this storage object storageParameters = storage.getParameters() pluginName = storageParameters['PluginName'] if not lfnDict: log.debug("No lfns to be attempted for %s protocol." % pluginName) continue log.verbose("Generating %s protocol URLs for %s." % (len(lfnDict), pluginName)) replicaDict = kwargs.pop('replicaDict', {}) if storage.pluginName != "Proxy": res = self.__generateURLDict(lfnDict, storage, replicaDict=replicaDict) urlDict = res['Value']['Successful'] # url : lfn failed.update(res['Value']['Failed']) else: urlDict = dict([(lfn, lfn) for lfn in lfnDict]) if not urlDict: log.verbose("__executeMethod No urls generated for protocol %s." % pluginName) else: log.verbose( "Attempting to perform '%s' for %s physical files" % (self.methodName, len(urlDict))) fcn = None if hasattr(storage, self.methodName) and callable(getattr(storage, self.methodName)): fcn = getattr(storage, self.methodName) if not fcn: return S_ERROR( DErrno.ENOMETH, "SE.__executeMethod: unable to invoke %s, it isn't a member function of storage") urlsToUse = {} # url : the value of the lfn dictionary for the lfn of this url for url in urlDict: urlsToUse[url] = lfnDict[urlDict[url]] startDate = datetime.datetime.utcnow() startTime = time.time() res = fcn(urlsToUse, *args, **kwargs) elapsedTime = time.time() - startTime self.addAccountingOperation(urlsToUse, startDate, elapsedTime, storageParameters, res) if not res['OK']: errStr = "Completely failed to perform %s." % self.methodName log.debug(errStr, 'with plugin %s: %s' % (pluginName, res['Message'])) for lfn in urlDict.values(): if lfn not in failed: failed[lfn] = '' failed[lfn] = "%s %s" % (failed[lfn], res['Message']) if failed[lfn] else res['Message'] else: for url, lfn in urlDict.items(): if url not in res['Value']['Successful']: if lfn not in failed: failed[lfn] = '' if url in res['Value']['Failed']: self.log.debug(res['Value']['Failed'][url]) failed[lfn] = "%s %s" % (failed[lfn], res['Value']['Failed'][url] ) if failed[lfn] else res['Value']['Failed'][url] else: errStr = 'No error returned from plug-in' failed[lfn] = "%s %s" % (failed[lfn], errStr) if failed[lfn] else errStr else: successful[lfn] = res['Value']['Successful'][url] if lfn in failed: failed.pop(lfn) lfnDict.pop(lfn) gDataStoreClient.commit() return S_OK({'Failed': failed, 'Successful': successful}) def __getattr__(self, name): """ Forwards the equivalent Storage calls to __executeMethod""" # We take either the equivalent name, or the name itself self.methodName = StorageElementItem.__equivalentMethodNames.get(name, None) if self.methodName: return self.__executeMethod raise AttributeError("StorageElement does not have a method '%s'" % name) def addAccountingOperation(self, lfns, startDate, elapsedTime, storageParameters, callRes): """ Generates a DataOperation accounting if needs to be, and adds it to the DataStore client cache :param lfns : list of lfns on which we attempted the operation :param startDate : datetime, start of the operation :param elapsedTime : time (seconds) the operation took :param storageParameters : the parameters of the plugins used to perform the operation :param callRes : the return of the method call, S_OK or S_ERROR The operation is generated with the OperationType "se.methodName" The TransferSize and TransferTotal for directory methods actually take into account the files inside the directory, and not the amount of directory given as parameter """ if self.methodName not in (self.readMethods + self.writeMethods + self.removeMethods): return baseAccountingDict = {} baseAccountingDict['OperationType'] = 'se.%s' % self.methodName baseAccountingDict['User'] = getProxyInfo().get('Value', {}).get('username', 'unknown') baseAccountingDict['RegistrationTime'] = 0.0 baseAccountingDict['RegistrationOK'] = 0 baseAccountingDict['RegistrationTotal'] = 0 # if it is a get method, then source and destination of the transfer should be inverted if self.methodName == 'getFile': baseAccountingDict['Destination'] = siteName() baseAccountingDict['Source'] = self.name else: baseAccountingDict['Destination'] = self.name baseAccountingDict['Source'] = siteName() baseAccountingDict['TransferTotal'] = 0 baseAccountingDict['TransferOK'] = 0 baseAccountingDict['TransferSize'] = 0 baseAccountingDict['TransferTime'] = 0.0 baseAccountingDict['FinalStatus'] = 'Successful' oDataOperation = DataOperation() oDataOperation.setValuesFromDict(baseAccountingDict) oDataOperation.setStartTime(startDate) oDataOperation.setEndTime(startDate + datetime.timedelta(seconds=elapsedTime)) oDataOperation.setValueByKey('TransferTime', elapsedTime) oDataOperation.setValueByKey('Protocol', storageParameters.get('Protocol', 'unknown')) if not callRes['OK']: # Everything failed oDataOperation.setValueByKey('TransferTotal', len(lfns)) oDataOperation.setValueByKey('FinalStatus', 'Failed') else: succ = callRes.get('Value', {}).get('Successful', {}) failed = callRes.get('Value', {}).get('Failed', {}) totalSize = 0 # We don't take len(lfns) in order to make two # separate entries in case of few failures totalSucc = len(succ) if self.methodName in ('putFile', 'getFile'): # putFile and getFile return for each entry # in the successful dir the size of the corresponding file totalSize = sum(succ.values()) elif self.methodName in ('putDirectory', 'getDirectory'): # putDirectory and getDirectory return for each dir name # a dictionnary with the keys 'Files' and 'Size' totalSize = sum(val.get('Size', 0) for val in succ.values() if isinstance(val, dict)) totalSucc = sum(val.get('Files', 0) for val in succ.values() if isinstance(val, dict)) oDataOperation.setValueByKey('TransferOK', len(succ)) oDataOperation.setValueByKey('TransferSize', totalSize) oDataOperation.setValueByKey('TransferTotal', totalSucc) oDataOperation.setValueByKey('TransferOK', totalSucc) if callRes['Value']['Failed']: oDataOperationFailed = copy.deepcopy(oDataOperation) oDataOperationFailed.setValueByKey('TransferTotal', len(failed)) oDataOperationFailed.setValueByKey('TransferOK', 0) oDataOperationFailed.setValueByKey('TransferSize', 0) oDataOperationFailed.setValueByKey('FinalStatus', 'Failed') accRes = gDataStoreClient.addRegister(oDataOperationFailed) if not accRes['OK']: self.log.error("Could not send failed accounting report", accRes['Message']) accRes = gDataStoreClient.addRegister(oDataOperation) if not accRes['OK']: self.log.error("Could not send accounting report", accRes['Message'])
def __init__(self, name, plugins=None, vo=None, hideExceptions=False): """ c'tor :param str name: SE name :param list plugins: requested storage plugins :param vo: vo """ self.methodName = None if vo: self.vo = vo else: result = getVOfromProxyGroup() if not result['OK']: return self.vo = result['Value'] self.opHelper = Operations(vo=self.vo) # These things will soon have to go as well. 'AccessProtocol.1' is all but flexible. proxiedProtocols = gConfig.getValue('/LocalSite/StorageElements/ProxyProtocols', "").split(',') self.useProxy = ( gConfig.getValue( "/Resources/StorageElements/%s/AccessProtocol.1/Protocol" % name, "UnknownProtocol") in proxiedProtocols) if not self.useProxy: self.useProxy = gConfig.getValue('/LocalSite/StorageElements/%s/UseProxy' % name, False) if not self.useProxy: self.useProxy = self.opHelper.getValue('/Services/StorageElements/%s/UseProxy' % name, False) self.valid = True if plugins is None: res = StorageFactory( useProxy=self.useProxy, vo=self.vo).getStorages( name, pluginList=[], hideExceptions=hideExceptions) else: res = StorageFactory( useProxy=self.useProxy, vo=self.vo).getStorages( name, pluginList=plugins, hideExceptions=hideExceptions) if not res['OK']: self.valid = False self.name = name self.errorReason = res['Message'] else: factoryDict = res['Value'] self.name = factoryDict['StorageName'] self.options = factoryDict['StorageOptions'] self.localPlugins = factoryDict['LocalPlugins'] self.remotePlugins = factoryDict['RemotePlugins'] self.storages = factoryDict['StorageObjects'] self.protocolOptions = factoryDict['ProtocolOptions'] self.turlProtocols = factoryDict['TurlProtocols'] for storage in self.storages: storage.setStorageElement(self) self.log = gLogger.getSubLogger("SE[%s]" % self.name) if self.valid: self.useCatalogURL = gConfig.getValue( '/Resources/StorageElements/%s/UseCatalogURL' % self.name, False) self.log.debug("useCatalogURL: %s" % self.useCatalogURL) self.__dmsHelper = DMSHelpers(vo=vo) # Allow SE to overwrite general operation config accessProto = self.options.get('AccessProtocols') self.localAccessProtocolList = accessProto if accessProto else self.__dmsHelper.getAccessProtocols() self.log.debug("localAccessProtocolList %s" % self.localAccessProtocolList) writeProto = self.options.get('WriteProtocols') self.localWriteProtocolList = writeProto if writeProto else self.__dmsHelper.getWriteProtocols() self.log.debug("localWriteProtocolList %s" % self.localWriteProtocolList) # 'getTransportURL', self.readMethods = ['getFile', 'prestageFile', 'prestageFileStatus', 'getDirectory'] self.writeMethods = ['retransferOnlineFile', 'putFile', 'replicateFile', 'pinFile', 'releaseFile', 'createDirectory', 'putDirectory'] self.removeMethods = ['removeFile', 'removeDirectory'] self.checkMethods = ['exists', 'getDirectoryMetadata', 'getDirectorySize', 'getFileSize', 'getFileMetadata', 'listDirectory', 'isDirectory', 'isFile', 'getOccupancy' ] self.okMethods = ['getLocalProtocols', 'getProtocols', 'getRemoteProtocols', 'storageElementName', 'getStorageParameters', 'getTransportURL', 'isLocalSE'] self.__fileCatalog = None
class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger(plugin) def logVerbose(self, message, param=''): """ logger helper """ if self.debug: self.log.info('(V)' + self.transString + message, param) else: self.log.verbose(self.transString + message, param) def logDebug(self, message, param=''): """ logger helper """ self.log.debug(self.transString + message, param) def logInfo(self, message, param=''): """ logger helper """ self.log.info(self.transString + message, param) def logWarn(self, message, param=''): """ logger helper """ self.log.warn(self.transString + message, param) def logError(self, message, param=''): """ logger helper """ self.log.error(self.transString + message, param) def logException(self, message, param='', lException=False): """ logger helper """ self.log.exception(self.transString + message, param, lException) def setParameters(self, params): """ Set the transformation parameters and extract transID """ self.params = params self.transID = params['TransformationID'] self.transString = self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID) # @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not files: return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if flush or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s)" % (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" % len(files)) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000 if not self.maxFiles: # FIXME: prepare for chaging the name of the ambiguoug CS option self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100)) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) if not tasks and not flush and taskLfns: self.logVerbose( 'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' % (taskSize, self.groupSize)) return tasks # @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000 flush = (status == 'Flush') self.logVerbose( "groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(files.keys()) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose( "groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters('TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) # @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug( "Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) # @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default is not None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value is not None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose( "Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and not isinstance(value, valueType): if valueType is list: try: value = ast.literal_eval(value) if value and value != 'None' else [] # literal_eval('SE-DST') -> ValueError # literal_eval('SE_MC-DST') -> SyntaxError # Don't ask... except (ValueError, SyntaxError): value = [val for val in value.replace(' ', '').split(',') if val] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): """ Normalize shares to 1 """ total = sum(float(share) for share in originalShares.values()) return dict([(site, 100. * float(share) / total if total else 0.) for site, share in originalShares.items()]) def uniqueSEs(self, ses): """ return a list of SEs that are not physically the same """ newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): """ Check if 2 SEs are indeed the same. :param se1: name of the first StorageElement :param se2: name of the second StorageElement :returns: True/False if they are considered the same. See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE` """ if se1 == se2: return True return StorageElement(se1).isSameSE(StorageElement(se2)) def isSameSEInList(self, se1, seList): """ Check if an SE is the same as any in a list """ if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs] existingSites = set([site for site in existingSites if site]) closeSEs = set([se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs
class Request( object ): """ :param int RequestID: requestID :param str Name: request' name :param str OwnerDN: request's owner DN :param str OwnerGroup: request owner group :param str Setup: DIRAC setup :param str SourceComponent: whatever :param int JobID: jobID :param datetime.datetime CreationTime: UTC datetime :param datetime.datetime SubmissionTime: UTC datetime :param datetime.datetime LastUpdate: UTC datetime :param datetime.datetime NotBefore: UTC datetime :param str Status: request's status :param TypedList.TypedList operations: list of operations It is managed by SQLAlchemy, so the RequestID should never be set by hand (except when constructed from JSON of course...) """ ALL_STATES = ( "Waiting", "Failed", "Done", "Scheduled", "Assigned", "Canceled" ) FINAL_STATES = ( "Done", "Failed", "Canceled" ) _datetimeFormat = '%Y-%m-%d %H:%M:%S' def __init__( self, fromDict = None ): """c'tor :param self: self reference :param fromDict : if false, new request. Can be json string that represents the object, or the dictionary directly """ self.__waiting = None now = datetime.datetime.utcnow().replace( microsecond = 0 ) self._CreationTime = now self._SubmitTime = now self._LastUpdate = now # the time before which the request should not be executed # If None, no delay self._NotBefore = now self._Status = "Done" self.JobID = 0 self.Error = None self.DIRACSetup = None self.OwnerDN = None self.RequestName = None self.OwnerGroup = None self.SourceComponent = None self.dmsHelper = DMSHelpers() proxyInfo = getProxyInfo() if proxyInfo["OK"]: proxyInfo = proxyInfo["Value"] if proxyInfo["validGroup"] and proxyInfo["validDN"]: self.OwnerDN = proxyInfo["identity"] self.OwnerGroup = proxyInfo["group"] self.__operations__ = [] fromDict = fromDict if isinstance( fromDict, dict )\ else json.loads( fromDict ) if isinstance( fromDict, StringTypes )\ else {} if "Operations" in fromDict: for opDict in fromDict.get( "Operations", [] ): self +=Operation( opDict ) del fromDict["Operations"] for key, value in fromDict.items(): # The JSON module forces the use of UTF-8, which is not properly # taken into account in DIRAC. # One would need to replace all the '== str' with 'in StringTypes' if type( value ) in StringTypes: value = value.encode() if value: setattr( self, key, value ) self._notify() def _notify( self ): """ simple state machine for sub request statuses """ # # update operations statuses self.__waiting = None # Update the Order in Operation, and set the parent for i in range( len( self.__operations__ ) ): self.__operations__[i].Order = i self.__operations__[i]._parent = self rStatus = "Waiting" opStatusList = [ ( op.Status, op ) for op in self ] while opStatusList: # # Scan all status in order! opStatus, op = opStatusList.pop( 0 ) # # Failed -> Failed if opStatus == "Failed" and self.__waiting is None: rStatus = "Failed" break # Scheduled -> Scheduled if opStatus == "Scheduled": if self.__waiting == None: self.__waiting = op rStatus = "Scheduled" # # First operation Queued becomes Waiting if no Waiting/Scheduled before elif opStatus == "Queued": if self.__waiting == None: self.__waiting = op op._setWaiting( self ) rStatus = "Waiting" # # First operation Waiting is next to execute, others are queued elif opStatus == "Waiting": rStatus = "Waiting" if self.__waiting == None: self.__waiting = op else: op._setQueued( self ) # # All operations Done -> Done elif opStatus == "Done" and self.__waiting == None: rStatus = "Done" self.Error = '' self.Status = rStatus def getWaiting( self ): """ get waiting operation if any """ # # update states self._notify() return S_OK( self.__waiting ) # # Operation arithmetics def __contains__( self, operation ): """ in operator :param self: self reference :param Operation.Operation subRequest: a subRequest """ return bool( operation in self.__operations__ ) def __iadd__( self, operation ): """ += operator for subRequest :param self: self reference :param Operation.Operation operation: sub-request to add """ if operation not in self: self.__operations__.append( operation ) operation._parent = self self._notify() return self def insertBefore( self, newOperation, existingOperation ): """ insert :newOperation: just before :existingOperation: :param self: self reference :param Operation.Operation newOperation: Operation to be inserted :param Operation.Operation existingOperation: previous Operation sibling """ if existingOperation not in self: return S_ERROR( "%s is not in" % existingOperation ) if newOperation in self: return S_ERROR( "%s is already in" % newOperation ) self.__operations__.insert( self.__operations__.index( existingOperation ), newOperation ) self._notify() return S_OK() def insertAfter( self, newOperation, existingOperation ): """ insert :newOperation: just after :existingOperation: :param self: self reference :param Operation.Operation newOperation: Operation to be inserted :param Operation.Operation existingOperation: next Operation sibling """ if existingOperation not in self: return S_ERROR( "%s is not in" % existingOperation ) if newOperation in self: return S_ERROR( "%s is already in" % newOperation ) self.__operations__.insert( self.__operations__.index( existingOperation ) + 1, newOperation ) self._notify() return S_OK() def addOperation( self, operation ): """ add :operation: to list of Operations :param self: self reference :param Operation.Operation operation: Operation to be inserted """ if operation in self: return S_ERROR( "This operation is already in!!!" ) self +=operation return S_OK() def isEmpty( self ): """ Evaluate if the request is empty """ return len( self.__operations__ ) == 0 def __iter__( self ): """ iterator for sub-request """ return self.__operations__.__iter__() def __getitem__( self, i ): """ [] op for sub requests """ return self.__operations__.__getitem__( i ) def __setitem__( self, i, value ): """ self[i] = val """ self.__operations__.__setitem__( i, value ) self._notify() def __delitem__( self, i ): """ del self[i]""" self.__operations__.__delitem__( i ) self._notify() def indexOf( self, subReq ): """ return index of subReq (execution order) """ return self.__operations__.index( subReq ) if subReq in self else -1 def __nonzero__( self ): """ for comparisons """ return True def __len__( self ): """ nb of subRequests """ return len( self.__operations__ ) def __str__( self ): """ str operator """ return self.toJSON()['Value'] def subStatusList( self ): """ list of statuses for all operations """ return [ subReq.Status for subReq in self ] @property def CreationTime( self ): """ creation time getter """ return self._CreationTime @CreationTime.setter def CreationTime( self, value = None ): """ creation time setter """ if type( value ) not in ( [datetime.datetime] + list( StringTypes ) ) : raise TypeError( "CreationTime should be a datetime.datetime!" ) if type( value ) in StringTypes: value = datetime.datetime.strptime( value.split( "." )[0], self._datetimeFormat ) self._CreationTime = value @property def SubmitTime( self ): """ request's submission time getter """ return self._SubmitTime @SubmitTime.setter def SubmitTime( self, value = None ): """ submission time setter """ if type( value ) not in ( [datetime.datetime] + list( StringTypes ) ): raise TypeError( "SubmitTime should be a datetime.datetime!" ) if type( value ) in StringTypes: value = datetime.datetime.strptime( value.split( "." )[0], self._datetimeFormat ) self._SubmitTime = value @property def NotBefore( self ): """ Getter for NotBefore time""" return self._NotBefore @NotBefore.setter def NotBefore( self, value = None ): """ Setter for the NotBefore time """ if type( value ) not in ( [NoneType] + [datetime.datetime] + list( StringTypes ) ): raise TypeError( "NotBefore should be a datetime.datetime!" ) if type( value ) in StringTypes: value = datetime.datetime.strptime( value.split( "." )[0], self._datetimeFormat ) self._NotBefore = value def delayNextExecution( self, deltaTime ): """This helper sets the NotBefore attribute in deltaTime minutes in the future :param deltaTime : time in minutes before next execution """ now = datetime.datetime.utcnow().replace( microsecond = 0 ) extraDelay = datetime.timedelta( minutes = deltaTime ) self._NotBefore = now + extraDelay return S_OK() @property def LastUpdate( self ): """ last update getter """ return self._LastUpdate @LastUpdate.setter def LastUpdate( self, value = None ): """ last update setter """ if type( value ) not in ( [datetime.datetime] + list( StringTypes ) ): raise TypeError( "LastUpdate should be a datetime.datetime!" ) if type( value ) in StringTypes: value = datetime.datetime.strptime( value.split( "." )[0], self._datetimeFormat ) self._LastUpdate = value @property def Status( self ): """ status getter """ self._notify() return self._Status @Status.setter def Status( self, value ): """ status setter """ if value not in Request.ALL_STATES: raise ValueError( "Unknown status: %s" % str( value ) ) # If the status moved to Failed or Done, update the lastUpdate time if value in ( 'Done', 'Failed' ): if value != self._Status: self.LastUpdate = datetime.datetime.utcnow().replace( microsecond = 0 ) if value == 'Done': self.Error = '' self._Status = value @property def Order( self ): """ ro execution order getter """ self._notify() opStatuses = [ op.Status for op in self.__operations__ ] return opStatuses.index( "Waiting" ) if "Waiting" in opStatuses else len( opStatuses ) def toJSON( self ): """ Returns the JSON formated string that describes the request """ jsonStr = json.dumps( self, cls = RMSEncoder ) return S_OK( jsonStr ) def _getJSONData( self ): """ Returns the data that have to be serialized by JSON """ attrNames = ['RequestID', "RequestName", "OwnerDN", "OwnerGroup", "Status", "Error", "DIRACSetup", "SourceComponent", "JobID", "CreationTime", "SubmitTime", "LastUpdate", "NotBefore"] jsonData = {} for attrName in attrNames : # RequestID might not be set since it is managed by SQLAlchemy if not hasattr( self, attrName ): continue value = getattr( self, attrName ) if isinstance( value, datetime.datetime ): # We convert date time to a string jsonData[attrName] = value.strftime( self._datetimeFormat ) else: jsonData[attrName] = value jsonData['Operations'] = self.__operations__ return jsonData def getDigest( self ): """ return digest for request """ digest = ['Name:' + self.RequestName] for op in self: opDigest = [ str( item ) for item in ( op.Type, op.Type, op.Status, op.Order ) ] if op.TargetSE: opDigest.append( op.TargetSE ) if op.Catalog: opDigest.append( op.Catalog ) if len( op ): opFile = op[0] extraFilesStr = "...+<%d files>" % ( len( op ) - 1 ) if (len(op) > 1 ) else '' opDigest.append( opFile.LFN + extraFilesStr ) digest.append( ":".join( opDigest ) ) return S_OK( "\n".join( digest ) ) def optimize( self ): """ Merges together the operations that can be merged. They need to have the following arguments equal: * Type * Arguments * SourceSE * TargetSE * Catalog It also makes sure that the maximum number of Files in an Operation is never overcome. CAUTION: this method is meant to be called before inserting into the DB. So if the RequestID is not 0, we don't touch :return: S_ERROR if the Request should not be optimized (because already in the DB) S_OK(True) if a optimization was carried out S_OK(False) if no optimization were carried out """ # If the RequestID is not the default one (0), it probably means # the Request is already in the DB, so we don't touch anything if hasattr( self, 'RequestID' ) and getattr( self, 'RequestID' ): return S_ERROR( "Cannot optimize because Request seems to be already in the DB (RequestID %s)" % getattr( self, 'RequestID' ) ) # Set to True if the request could be optimized optimized = False # Recognise Failover request series repAndRegList = [] removeRepList = [] i = 0 while i < len( self.__operations__ ): insertNow = True if i < len( self.__operations__ ) - 1: op1 = self.__operations__[i] op2 = self.__operations__[i + 1] if getattr( op1, 'Type' ) == 'ReplicateAndRegister' and \ getattr( op2, 'Type' ) == 'RemoveReplica': fileSetA = set( list( f.LFN for f in op1 ) ) fileSetB = set( list( f.LFN for f in op2 ) ) if fileSetA == fileSetB: # Source is useless if failover if self.dmsHelper.isSEFailover( op1.SourceSE ): op1.SourceSE = '' repAndRegList.append( ( op1.TargetSE, op1 ) ) removeRepList.append( ( op2.TargetSE, op2 ) ) del self.__operations__[i] del self.__operations__[i] # If we are at the end of the request, we must insert the new operations insertNow = ( i == len( self.__operations__ ) ) # print i, self.__operations__[i].Type if i < len( self.__operations__ ) else None, len( repAndRegList ), insertNow if insertNow: if repAndRegList: # We must insert the new operations there # i.e. insert before operation i (if it exists) # Replication first, removeReplica next optimized = True insertBefore = self.__operations__[i] if i < len( self.__operations__ ) else None # print 'Insert new operations before', insertBefore for op in \ [op for _targetSE, op in sorted( repAndRegList )] + \ [op for _targetSE, op in sorted( removeRepList )]: _res = self.insertBefore( op, insertBefore ) if insertBefore else self.addOperation( op ) # Skip the newly inserted operation i += 1 repAndRegList = [] removeRepList = [] else: # Skip current operation i += 1 else: # Just to show that in that case we don't increment i pass # List of attributes that must be equal for operations to be merged attrList = ["Type", "Arguments", "SourceSE", "TargetSE", "Catalog" ] i = 0 while i < len( self.__operations__ ): while i < len( self.__operations__ ) - 1: # Some attributes need to be the same attrMismatch = False for attr in attrList: if getattr( self.__operations__[i], attr ) != getattr( self.__operations__[i + 1], attr ): attrMismatch = True break if attrMismatch: break # We do not do the merge if there are common files in the operations fileSetA = set( list( f.LFN for f in self.__operations__[i] ) ) fileSetB = set( list( f.LFN for f in self.__operations__[i + 1] ) ) if fileSetA & fileSetB: break # There is a maximum number of files one can add into an operation try: while len( self.__operations__[i + 1] ): fileToMove = self.__operations__[i + 1][0] self.__operations__[i] += fileToMove # If the object is mapped to SQLAlchemy object with a relationship # having the delete-orphan option, the fileToMove will have # already disappeared from the original operation. Silly... # If not, we have to remove it manually if len( self.__operations__[i + 1] )\ and ( self.__operations__[i + 1][0] == fileToMove ): del self.__operations__[i + 1][0] optimized = True del self.__operations__[i + 1] except RuntimeError: i += 1 i += 1 return S_OK( optimized )
def getFilesToStage( lfnList, jobState = None, checkOnlyTapeSEs = None, jobLog = None ): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK( {'onlineLFNs':[], 'offlineLFNs': {}, 'failedLFNs':[], 'absentLFNs':{}} ) dm = DataManager() if isinstance( lfnList, basestring ): lfnList = [lfnList] lfnListReplicas = dm.getReplicasForJobs( lfnList, getUrl = False ) if not lfnListReplicas['OK']: return lfnListReplicas offlineLFNsDict = {} onlineLFNs = {} offlineLFNs = {} absentLFNs = {} failedLFNs = set() if lfnListReplicas['Value']['Failed']: # Check if files are not existing for lfn, reason in lfnListReplicas['Value']['Failed'].iteritems(): # FIXME: awful check until FC returns a proper error if cmpError( reason, errno.ENOENT ) or 'No such file' in reason: # The file doesn't exist, job must be Failed # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR( errno.ENOENT, 'File not in FC' )['Message'] if absentLFNs: return S_OK({'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs}) return S_ERROR( "Failures in getting replicas" ) lfnListReplicas = lfnListReplicas['Value']['Successful'] # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority # We shall check all file anyway in order to make sure they exist seToLFNs = dict() for lfn, ses in lfnListReplicas.iteritems(): for se in ses: seToLFNs.setdefault( se, list() ).append( lfn ) if seToLFNs: if jobState: # Get user name and group from the job state userName = jobState.getAttribute( 'Owner' ) if not userName[ 'OK' ]: return userName userName = userName['Value'] userGroup = jobState.getAttribute( 'OwnerGroup' ) if not userGroup[ 'OK' ]: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None # Check whether files are Online or Offline, or missing at SE result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, # pylint: disable=unexpected-keyword-arg checkOnlyTapeSEs = checkOnlyTapeSEs, jobLog = jobLog, proxyUserName = userName, proxyUserGroup = userGroup, executionLock = True ) if not result['OK']: return result failedLFNs = set( lfnList ) - set( onlineLFNs ) - set( offlineLFNs ) - set( absentLFNs ) # Get the online SEs dmsHelper = DMSHelpers() onlineSEs = set( se for ses in onlineLFNs.values() for se in ses ) onlineSites = set( dmsHelper.getLocalSiteForSE( se ).get( 'Value' ) for se in onlineSEs ) - {None} for lfn in offlineLFNs: ses = offlineLFNs[lfn] if len( ses ) == 1: # No choice, let's go offlineLFNsDict.setdefault( ses[0], list() ).append( lfn ) continue # Try and get an SE at a site already with online files found = False if onlineSites: # If there is at least one online site, select one for se in ses: site = dmsHelper.getLocalSiteForSE( se ) if site['OK']: if site['Value'] in onlineSites: offlineLFNsDict.setdefault( se, list() ).append( lfn ) found = True break # No online site found in common, select randomly if not found: offlineLFNsDict.setdefault( random.choice( ses ), list() ).append( lfn ) return S_OK({'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs, 'onlineSites': onlineSites})