def loookup_replicas(files, protocol=['xroot', 'root']): from DIRAC.DataManagementSystem.Client.DataManager import DataManager from DIRAC.Resources.Storage.StorageElement import StorageElement from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient dm = DataManager() bk = BookkeepingClient() files_map = {f.lfn: f for f in files} res = dm.getReplicas([f.lfn for f in files], getUrl=False) replicas = res.get('Value', {}).get('Successful', {}) seList = sorted(set(se for f in files for se in replicas.get(f.lfn, {}))) # banned_SE_list = [se for se in seList if 'CNAF' in se] banned_SE_list = [] print('Found SE list of', seList) # Check if files are MDF bkRes = bk.getFileTypeVersion([f.lfn for f in files]) assert not set(lfn for lfn, fileType in bkRes.get('Value', {}).iteritems() if fileType == 'MDF') for se in seList: # TODO Check if SEs are available lfns = [f.lfn for f in files if se in replicas.get(f.lfn, [])] if se in banned_SE_list: print('Skipping banned SE', se) for lfn in lfns: files_map[lfn].replicas.append(Replica(lfn, se, banned=True)) continue else: print('Looking up replicas for', len(lfns), 'files at', se) if lfns: res = StorageElement(se).getURL(lfns, protocol=protocol) if res['OK']: for lfn, pfn in res['Value']['Successful'].items(): files_map[lfn].replicas.append(Replica(lfn, se, pfn=pfn)) for lfn in res['Value']['Failed']: files_map[lfn].replicas.append(Replica(lfn, se, error=res)) else: print('LFN -> PFN lookup failed for', se, 'with error:', res['Message']) for lfn in lfns: files_map[lfn].replicas.append( Replica(lfn, se, error=res['Message']))
def filterReplicas( opFile, logger = None, dataManager = None ): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( 'Failed to get active replicas', replicas["Message"] ) return replicas reNotExists = re.compile( r".*such file.*" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas( opFile.LFN ) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {} ) if not allReplicas: ret['NoReplicas'].append( None ) noReplicas = True else: # We try inactive replicas to see if maybe the file doesn't exist at all replicas = allReplicas log.warn( "File has no%s replica in File Catalog" % ( '' if noReplicas else ' active' ), opFile.LFN ) else: return allReplicas if not opFile.Checksum: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata( opFile.LFN ) fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' ) # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' ) for repSEName in replicas: repSEMetadata = StorageElement( repSEName ).getFileMetadata( opFile.LFN ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: ret["NoMetadata"].append( repSEName ) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = repSEMetadata.get( "Checksum" ) if not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and not opFile.Checksum: opFile.Checksum = seChecksum if not opFile.Checksum or not seChecksum or compareAdler( seChecksum, opFile.Checksum ): # # All checksums are OK ret["Valid"].append( repSEName ) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN, opFile.Checksum, repSEName, seChecksum ) ) ret["Bad"].append( repSEName ) else: # If a replica was found somewhere, don't set the file as no replicas ret['NoReplicas'] = [] return S_OK( ret )
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") result = defaultdict(list) replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False) if not replicas["OK"]: log.error('Failed to get active replicas', replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {}) if not allReplicas: result['NoReplicas'].append(None) noReplicas = True else: # There are replicas but we cannot get metadata because the replica is not active result['NoActiveReplicas'] += list(allReplicas) log.verbose( "File has no%s replica in File Catalog" % ('' if noReplicas else ' active'), opFile.LFN) else: return allReplicas if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get('Value', {}).get('Successful', {}).get(opFile.LFN, {}).get('Checksum') # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt(fcChecksum) is not False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][ opFile.LFN].get('ChecksumType', 'Adler32') else: opFile.Checksum = None # If no replica was found, return what we collected as information if not replicas: return S_OK(result) for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get( 'Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN)) if error: log.warn( 'unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', '')) if 'File does not exist' in error: result['NoReplicas'].append(repSEName) else: result["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum")) # As from here seChecksum is an integer or False, not a hex string! if seChecksum is False and opFile.Checksum: result['NoMetadata'].append(repSEName) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and (not opFile.Checksum or opFile.Checksum == 'False'): # Use the SE checksum (convert to hex) and force type to be Adler32 opFile.Checksum = intAdlerToHex(seChecksum) opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( intAdlerToHex(seChecksum), opFile.Checksum): # # All checksums are OK result["Valid"].append(repSEName) else: log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, intAdlerToHex(seChecksum))) result["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas result['NoReplicas'] = [] return S_OK(result)
gLogger.always("Couldn't find SEs for site %s" % site) continue seList = res['Value'] inputData = sorted(filesAtSite[site]) if verbose: gLogger.always("%sSite: %s, jobs: %s, %d files" % (sep, site, ','.join(jobs), len(inputData))) else: gLogger.always("%sSite: %s, %d jobs, %d files" % (sep, site, len(jobs), len(inputData))) sep = '=====================================\n' if verbose: gLogger.always('For %s, SEs: %s' % (site, str(seList))) pbFound = False res = dm.getReplicas(inputData) if not res['OK']: gLogger.always( "Error getting replicas for %d files" % len(inputData), res['Message']) continue replicas = res['Value']['Successful'] notInFC = res['Value']['Failed'] if notInFC: # Check if files has replica flag in the FC, If not ignore the problem res = bk.getFileMetadata(notInFC.keys()) if not res['OK']: gLogger.always( 'Error getting BK metadata for %d files' % len(notInFC), res['Message']) continue
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") result = defaultdict(list) replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False) if not replicas["OK"]: log.error('Failed to get active replicas', replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get(opFile.LFN, {}) if not allReplicas: result['NoReplicas'].append(None) noReplicas = True else: # There are replicas but we cannot get metadata because the replica is not active result['NoActiveReplicas'] += list(allReplicas) log.verbose("File has no%s replica in File Catalog" % ('' if noReplicas else ' active'), opFile.LFN) else: return allReplicas if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get( 'Value', {}).get( 'Successful', {}).get( opFile.LFN, {}).get('Checksum') # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt(fcChecksum) is not False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get('ChecksumType', 'Adler32') else: opFile.Checksum = None # If no replica was found, return what we collected as information if not replicas: return S_OK(result) for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get('Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN)) if error: log.warn('unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', '')) if 'File does not exist' in error: result['NoReplicas'].append(repSEName) else: result["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum")) # As from here seChecksum is an integer or False, not a hex string! if seChecksum is False and opFile.Checksum: result['NoMetadata'].append(repSEName) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and (not opFile.Checksum or opFile.Checksum == 'False'): # Use the SE checksum (convert to hex) and force type to be Adler32 opFile.Checksum = intAdlerToHex(seChecksum) opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( intAdlerToHex(seChecksum), opFile.Checksum): # # All checksums are OK result["Valid"].append(repSEName) else: log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, intAdlerToHex(seChecksum))) result["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas result['NoReplicas'] = [] return S_OK(result)
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") ret = {"Valid": [], "NoMetadata": [], "Bad": [], "NoReplicas": [], "NoPFN": []} replicas = dataManager.getActiveReplicas(opFile.LFN) if not replicas["OK"]: log.error("Failed to get active replicas", replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN) if allReplicas["OK"]: allReplicas = allReplicas["Value"]["Successful"].get(opFile.LFN, {}) if not allReplicas: ret["NoReplicas"].append(None) noReplicas = True else: # We try inactive replicas to see if maybe the file doesn't exist at all replicas = allReplicas log.warn("File has no%s replica in File Catalog" % ("" if noReplicas else " active"), opFile.LFN) else: return allReplicas if not opFile.Checksum: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get("Value", {}).get("Successful", {}).get(opFile.LFN, {}).get("Checksum") # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata["Value"]["Successful"][opFile.LFN].get("ChecksumType", "Adler32") for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get("Message", repSEMetadata.get("Value", {}).get("Failed", {}).get(opFile.LFN)) if error: log.warn("unable to get metadata at %s for %s" % (repSEName, opFile.LFN), error.replace("\n", "")) if "File does not exist" in error: ret["NoReplicas"].append(repSEName) else: ret["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata["Value"]["Successful"][opFile.LFN] seChecksum = repSEMetadata.get("Checksum") if not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and not opFile.Checksum: opFile.Checksum = seChecksum if not opFile.Checksum or not seChecksum or compareAdler(seChecksum, opFile.Checksum): # # All checksums are OK ret["Valid"].append(repSEName) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, seChecksum) ) ret["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas ret["NoReplicas"] = [] return S_OK(ret)
def filterReplicas( opFile, logger = None, dataManager = None ): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( 'Failed to get active replicas', replicas["Message"] ) return replicas reNotExists = re.compile( r".*such file.*" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas( opFile.LFN ) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {} ) if not allReplicas: ret['NoReplicas'].append( None ) noReplicas = True else: # We try inactive replicas to see if maybe the file doesn't exist at all replicas = allReplicas log.warn( "File has no%s replica in File Catalog" % ( '' if noReplicas else ' active' ), opFile.LFN ) else: return allReplicas if not opFile.Checksum or hexAdlerToInt( opFile.Checksum ) == False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata( opFile.LFN ) fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' ) # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt( fcChecksum ) != False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' ) else: opFile.Checksum = None for repSEName in replicas: repSEMetadata = StorageElement( repSEName ).getFileMetadata( opFile.LFN ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: ret["NoMetadata"].append( repSEName ) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt( repSEMetadata.get( "Checksum" ) ) if seChecksum == False and opFile.Checksum: ret['NoMetadata'].append( repSEName ) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and ( not opFile.Checksum or opFile.Checksum == 'False' ): # Use the SE checksum and force type to be Adler32 opFile.Checksum = seChecksum opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( seChecksum, opFile.Checksum ): # # All checksums are OK ret["Valid"].append( repSEName ) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN, opFile.Checksum, repSEName, seChecksum ) ) ret["Bad"].append( repSEName ) else: # If a replica was found somewhere, don't set the file as no replicas ret['NoReplicas'] = [] return S_OK( ret )
class fakeClient: def __init__(self, trans, transID, lfns, asIfProd): self.trans = trans self.transID = transID from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient self.transClient = TransformationClient() from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient self.bk = BookkeepingClient() from DIRAC.DataManagementSystem.Client.DataManager import DataManager self.dm = DataManager() self.asIfProd = asIfProd (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns) def addFilesToTransformation(self, transID, lfns): return S_OK({ 'Failed': {}, 'Successful': dict([(lfn, 'Added') for lfn in lfns]) }) def getTransformation(self, transID, extraParams=False): if transID == self.transID and self.asIfProd: transID = self.asIfProd if transID != self.transID: return self.transClient.getTransformation(transID) res = self.trans.getType() return DIRAC.S_OK({'Type': res['Value']}) def getReplicas(self): return self.transReplicas def getFiles(self): return self.transFiles def getCounters(self, table, attrList, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] != self.transID: return self.transClient.getCounters(table, attrList, condDict) possibleTargets = [ 'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW', 'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW' ] counters = [] for se in possibleTargets: counters.append(({'UsedSE': se}, 0)) return DIRAC.S_OK(counters) def getBookkeepingQuery(self, transID): if transID == self.transID and self.asIfProd: return self.transClient.getBookkeepingQuery(asIfProd) return self.trans.getBkQuery() def insertTransformationRun(self, transID, runID, xx): return DIRAC.S_OK() def getTransformationRuns(self, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] == self.transID: transRuns = [] runs = condDict.get('RunNumber', []) if not runs and self.transFiles: res = self.bk.getFileMetadata( [fileDict['LFN'] for fileDict in self.transFiles]) if not res['OK']: return res runs = list( set(meta['RunNumber'] for meta in res['Value']['Successful'].itervalues())) for run in runs: transRuns.append({ 'RunNumber': run, 'Status': "Active", "SelectedSite": None }) return DIRAC.S_OK(transRuns) else: return self.transClient.getTransformationRuns(condDict) def getTransformationFiles(self, condDict=None): if condDict.get('TransformationID') == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict.get('TransformationID') == self.transID: transFiles = [] if 'Status' in condDict and 'Unused' not in condDict['Status']: return DIRAC.S_OK(transFiles) runs = None if 'RunNumber' in condDict: runs = condDict['RunNumber'] if not isinstance(runs, list): runs = [runs] for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: transFiles.append({ 'LFN': fileDict['LFN'], 'Status': 'Unused', 'RunNumber': fileDict['RunNumber'] }) return DIRAC.S_OK(transFiles) else: return self.transClient.getTransformationFiles(condDict=condDict) def setParameterToTransformationFiles(self, transID, lfnDict): """ Update the transFiles with some parameters """ if transID == self.transID: for fileDict in self.transFiles: fileDict.update(lfnDict.get(fileDict['LFN'], {})) return S_OK() else: return self.transClient.setParameterToTransformationFiles( transID, lfnDict) def getTransformationFilesCount(self, transID, field, selection=None): if selection is None: selection = {} if transID == self.transID or selection.get( 'TransformationID') == self.transID: runs = selection.get('RunNumber') if runs and not isinstance(runs, list): runs = [runs] if field == 'Status': counters = {'Unused': 0} for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: counters['Unused'] += 1 elif field == 'RunNumber': counters = {} for fileDict in self.transFiles: runID = fileDict['RunNumber'] if not runs or runID in runs: counters.setdefault(runID, 0) counters[runID] += 1 else: return DIRAC.S_ERROR('Not implemented for field ' + field) counters['Total'] = sum(count for count in counters.itervalues()) return DIRAC.S_OK(counters) else: return self.transClient.getTransformationFilesCount( transID, field, selection=selection) def getTransformationRunStats(self, transIDs): counters = {} for transID in transIDs: if transID == self.transID: for fileDict in self.transFiles: runID = fileDict['RunNumber'] counters[transID][runID]['Unused'] = counters.setdefault( transID, {}).setdefault(runID, {}).setdefault( 'Unused', 0) + 1 for runID in counters[transID]: counters[transID][runID]['Total'] = counters[transID][ runID]['Unused'] else: res = self.transClient.getTransformationRunStats(transIDs) if res['OK']: counters.update(res['Value']) else: return res return DIRAC.S_OK(counters) def addRunsMetadata(self, runID, val): return self.transClient.addRunsMetadata(runID, val) def getRunsMetadata(self, runID): return self.transClient.getRunsMetadata(runID) def setTransformationRunStatus(self, transID, runID, status): return DIRAC.S_OK() def setTransformationRunsSite(self, transID, runID, site): return DIRAC.S_OK() def setFileStatusForTransformation(self, transID, status, lfns): return DIRAC.S_OK() def addTransformationRunFiles(self, transID, run, lfns): return DIRAC.S_OK() def setDestinationForRun(self, runID, site): return DIRAC.S_OK() def getDestinationForRun(self, runID): return self.transClient.getDestinationForRun(runID) def prepareForPlugin(self, lfns): import time print "Preparing the plugin input data (%d files)" % len(lfns) type = self.trans.getType()['Value'] if not lfns: return (None, None) res = self.bk.getFileMetadata(lfns) if res['OK']: files = [] for lfn, metadata in res['Value']['Successful'].iteritems(): runID = metadata.get('RunNumber', 0) runDict = {"RunNumber": runID, "LFN": lfn} files.append(runDict) else: print "Error getting BK metadata", res['Message'] return ([], {}) replicas = {} startTime = time.time() from DIRAC.Core.Utilities.List import breakListIntoChunks for lfnChunk in breakListIntoChunks(lfns, 200): # print lfnChunk if type.lower() in ("replication", "removal"): res = self.dm.getReplicas(lfnChunk, getUrl=False) else: res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False) # print res if res['OK']: for lfn, ses in res['Value']['Successful'].iteritems(): if ses: replicas[lfn] = sorted(ses) else: print "Error getting replicas of %d files:" % len( lfns), res['Message'] print "Obtained replicas of %d files in %.3f seconds" % ( len(lfns), time.time() - startTime) return (files, replicas)
def execute(): """ Parse the options and execute the script """ bkQuery = dmScript.getBKQuery() fileType = bkQuery.getFileTypeList() if not set(fileType) & {'FULL.DST', 'RDST', 'SDST'}: gLogger.error("Please provide a reconstruction BK path") DIRAC.exit(1) from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient from DIRAC.DataManagementSystem.Client.DataManager import DataManager from DIRAC.Core.Utilities.List import breakListIntoChunks from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers, resolveSEGroup bk = BookkeepingClient() tr = TransformationClient() dm = DataManager() dmsHelper = DMSHelpers() bkQueryDict = bkQuery.getQueryDict() gLogger.notice("For BK Query:", str(bkQueryDict)) progressBar = ProgressBar(1, title="Running BK query...", step=1) res = bk.getFilesWithMetadata(bkQueryDict) if not res['OK']: gLogger.error("Error getting files from BK", res['Message']) DIRAC.exit(2) if 'ParameterNames' in res.get('Value', {}): parameterNames = res['Value']['ParameterNames'] info = res['Value']['Records'] progressBar.endLoop("Obtained %d files" % len(info)) else: gLogger.error('\nNo metadata found') DIRAC.exit(3) lfns = [] runLFNs = {} for item in info: metadata = dict(zip(parameterNames, item)) lfn = metadata['FileName'] lfns.append(lfn) runLFNs.setdefault(metadata['RunNumber'], []).append(lfn) chunkSize = 1000 progressBar = ProgressBar(len(lfns), title='Getting replicas of %d files' % len(lfns), chunk=chunkSize) replicas = {} errors = {} for lfnChunk in breakListIntoChunks(lfns, chunkSize): progressBar.loop() res = dm.getReplicas(lfnChunk, getUrl=False) if not res['OK']: errors.setdefault(res['Message'], []).extend(lfnChunk) else: replicas.update(res['Value']['Successful']) for lfn, error in res['Value']['Failed'].iteritems(): errors.setdefault(error, []).append(lfn) progressBar.endLoop() for error, lfns in errors.iteritems(): gLogger.error(error, 'for %d files' % len(lfns)) tier1RDST = set(resolveSEGroup('Tier1-RDST')) setOK = 0 errors = {} progressBar = ProgressBar(len(runLFNs), title='Defining destination for %d runs' % len(runLFNs), step=10) for run, lfns in runLFNs.iteritems(): progressBar.loop() res = tr.getDestinationForRun(run) if res.get('Value'): errors.setdefault('Destination already set', []).append(str(run)) continue # print 'Run', run, len( lfns ), 'Files', lfns[:3] seCounts = {} for lfn in lfns: for se in tier1RDST.intersection(replicas.get(lfn, [])): seCounts[se] = seCounts.setdefault(se, 0) + 1 # print seCounts maxi = 0 seMax = None for se, count in seCounts.iteritems(): if count > maxi: seMax = se maxi = count if not seMax: errors.setdefault('No SE found, use CERN-RDST', []).append(str(run)) seMax = 'CERN-RDST' # SE found, get its site res = dmsHelper.getLocalSiteForSE(seMax) if res['OK']: site = res['Value'] res = tr.setDestinationForRun(run, site) if not res['OK']: errors.setdefault(res['Message'], []).append(str(run)) else: setOK += 1 progressBar.endLoop('Successfully set destination for %d runs' % setOK) for error, runs in errors.iteritems(): gLogger.error(error, 'for runs %s' % ','.join(runs))