class ConsistencyInspector(object): """A class for handling some consistency checks""" def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None): """c'tor interactive: Data Manager (True) or DIRAC Agente (False) transClient: TransformationClient() if None, else transClient params dm: DataManager() if None, else dm params fc: FileCatalog() if None, else fc params One object for every production/directoriesList... """ self.interactive = interactive self.transClient = TransformationClient( ) if transClient is None else transClient self.dataManager = dm if dm else DataManager() self.fileCatalog = fc if fc else FileCatalog() self.dic = dic if dic else DataIntegrityClient() self.dirac = Dirac() # Base elements from which to start the consistency checks self._prod = 0 self._bkQuery = None self._fileType = [] self._fileTypesExcluded = [] self._lfns = [] self.directories = [] # Accessory elements self.runsList = [] self.runStatus = None self.fromProd = None self.transType = "" self.cachedReplicas = {} self.prcdWithDesc = [] self.prcdWithoutDesc = [] self.prcdWithMultDesc = [] self.nonPrcdWithDesc = [] self.nonPrcdWithoutDesc = [] self.nonPrcdWithMultDesc = [] self.descForPrcdLFNs = [] self.descForNonPrcdLFNs = [] self.removedFiles = [] self.absentLFNsInFC = [] self.existLFNsNoSE = {} self.existLFNsBadReplicas = {} self.existLFNsBadFiles = {} self.existLFNsNotExisting = {} self.commonAncestors = {} self.multipleDescendants = {} self.ancestors = {} self._verbose = False def __logVerbose(self, msg, msg1=""): """logger helper for verbose information""" if self._verbose: newMsg = "[ConsistencyChecks] " + ( "[%s] " % str(self.prod)) if self.prod else "" # Add that prefix to all lines of the message newMsg1 = msg1.replace("\n", "\n" + newMsg) newMsg += msg.replace("\n", "\n" + newMsg) gLogger.notice(newMsg, newMsg1) else: gLogger.verbose(msg, msg1) ########################################################################## def checkFC2SE(self): """check files vs SE information""" repDict = self.compareChecksum(self.lfns) self.existLFNsNoSE = repDict["MissingReplica"] self.existLFNsNotExisting = repDict["MissingAllReplicas"] self.existLFNsBadReplicas = repDict["SomeReplicasCorrupted"] self.existLFNsBadFiles = repDict["AllReplicasCorrupted"] def getReplicasPresence(self, lfns): """get the replicas using the standard FileCatalog.getReplicas()""" present = set() notPresent = set() chunkSize = 100 printProgress = len(lfns) > chunkSize startTime = time.time() self.__write( "Checking replicas for %d files%s" % (len(lfns), (" (chunks of %d)" % chunkSize) if printProgress else "... ")) for chunk in breakListIntoChunks(lfns, chunkSize): if printProgress: self.__write(".") for _ in range(1, 10): res = self.fileCatalog.getReplicas(chunk) if res["OK"]: present.update(res["Value"]["Successful"]) self.cachedReplicas.update(res["Value"]["Successful"]) notPresent.update(res["Value"]["Failed"]) break else: time.sleep(0.1) self.__write(" (%.1f seconds)\n" % (time.time() - startTime)) if notPresent: self.__logVerbose("Files without replicas:", "\n".join([""] + sorted(notPresent))) return list(present), list(notPresent) ########################################################################## def getReplicasPresenceFromDirectoryScan(self, lfns): """Get replicas scanning the directories. Might be faster.""" dirs = {} present = [] notPresent = [] compare = True for lfn in lfns: dirN = os.path.dirname(lfn) if lfn == dirN + "/": compare = False dirs.setdefault(dirN, []).append(lfn) if compare: self.__write( "Checking File Catalog for %d files from %d directories " % (len(lfns), len(dirs))) else: self.__write("Getting files from %d directories " % len(dirs)) startTime = time.time() for dirN in sorted(dirs): startTime1 = time.time() self.__write(".") lfnsFound = self._getFilesFromDirectoryScan(dirN) gLogger.verbose("Obtained %d files in %.1f seconds" % (len(lfnsFound), time.time() - startTime1)) if compare: pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound) notPresent += notPr present += pr else: present += lfnsFound self.__write(" (%.1f seconds)\n" % (time.time() - startTime)) gLogger.info("Found %d files with replicas and %d without" % (len(present), len(notPresent))) return present, notPresent ########################################################################## def __compareLFNLists(self, lfns, lfnsFound): """return files in both lists and files in lfns and not in lfnsFound""" present = [] notPresent = lfns startTime = time.time() self.__logVerbose("Comparing list of %d LFNs with second list of %d" % (len(lfns), len(lfnsFound))) if lfnsFound: setLfns = set(lfns) setLfnsFound = set(lfnsFound) present = list(setLfns & setLfnsFound) notPresent = list(setLfns - setLfnsFound) self.__logVerbose("End of comparison: %.1f seconds" % (time.time() - startTime)) return present, notPresent def _getFilesFromDirectoryScan(self, dirs): """calls dm.getFilesFromDirectory""" level = gLogger.getLevel() gLogger.setLevel("FATAL") res = self.dataManager.getFilesFromDirectory(dirs) gLogger.setLevel(level) if not res["OK"]: if "No such file or directory" not in res["Message"]: gLogger.error( "Error getting files from directories %s:" % dirs, res["Message"]) return [] if res["Value"]: lfnsFound = res["Value"] else: lfnsFound = [] return lfnsFound ########################################################################## def _getTSFiles(self): """Helper function - get files from the TS""" selectDict = {"TransformationID": self.prod} if self._lfns: selectDict["LFN"] = self._lfns elif self.runStatus and self.fromProd: res = self.transClient.getTransformationRuns({ "TransformationID": self.fromProd, "Status": self.runStatus }) if not res["OK"]: gLogger.error("Failed to get runs for transformation %d" % self.prod) else: if res["Value"]: self.runsList.extend([ run["RunNumber"] for run in res["Value"] if run["RunNumber"] not in self.runsList ]) gLogger.notice("%d runs selected" % len(res["Value"])) elif not self.runsList: gLogger.notice("No runs selected, check completed") DIRAC.exit(0) if not self._lfns and self.runsList: selectDict["RunNumber"] = self.runsList res = self.transClient.getTransformation(self.prod) if not res["OK"]: gLogger.error("Failed to find transformation %s" % self.prod) return [], [], [] status = res["Value"]["Status"] if status not in ("Active", "Stopped", "Completed", "Idle"): gLogger.notice( "Transformation %s in status %s, will not check if files are processed" % (self.prod, status)) processedLFNs = [] nonProcessedLFNs = [] nonProcessedStatuses = [] if self._lfns: processedLFNs = self._lfns else: res = self.transClient.getTransformationFiles(selectDict) if not res["OK"]: gLogger.error( "Failed to get files for transformation %d" % self.prod, res["Message"]) return [], [], [] else: processedLFNs = [ item["LFN"] for item in res["Value"] if item["Status"] == "Processed" ] nonProcessedLFNs = [ item["LFN"] for item in res["Value"] if item["Status"] != "Processed" ] nonProcessedStatuses = list( set(item["Status"] for item in res["Value"] if item["Status"] != "Processed")) return processedLFNs, nonProcessedLFNs, nonProcessedStatuses def __getDirectories(self): """get the directories where to look into (they are either given, or taken from the transformation ID""" if self.directories: directories = [] printout = False for directory in self.directories: if not directory.endswith("..."): directories.append(directory) else: printout = True topDir = os.path.dirname(directory) res = self.fileCatalog.listDirectory(topDir) if not res["OK"]: # DError(errno.ENOENT, res['Message'] ) return S_ERROR(errno.ENOENT, res["Message"]) else: matchDir = directory.split("...")[0] directories += [ d for d in res["Value"]["Successful"].get( topDir, {}).get("SubDirs", []) if d.startswith(matchDir) ] if printout: gLogger.always("Expanded list of %d directories:\n%s" % (len(directories), "\n".join(directories))) return directories else: return S_ERROR(errno.ENOENT, "Need to specify the directories") ########################################################################## def __write(self, text): if self.interactive: sys.stdout.write(text) sys.stdout.flush() ########################################################################## def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None): """Select only those files from the values of lfnDict that have a certain type""" if not lfnDict: return {} if not fileTypes: fileTypes = self.fileType if not fileTypesExcluded: fileTypesExcluded = self.fileTypesExcluded else: fileTypesExcluded += [ ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded ] # lfnDict is a dictionary of dictionaries including the metadata, create a # deep copy to get modified ancDict = dict(lfnDict) if fileTypes == [""]: fileTypes = [] # and loop on the original dictionaries for ancestor in lfnDict: for desc in list(lfnDict[ancestor]): ft = lfnDict[ancestor][desc]["FileType"] if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes): ancDict[ancestor].pop(desc) if not len(ancDict[ancestor]): ancDict.pop(ancestor) return ancDict @staticmethod def _getFileTypesCount(lfnDict): """return file types count""" ft_dict = {} for ancestor in lfnDict: t_dict = {} for desc in lfnDict[ancestor]: ft = lfnDict[ancestor][desc]["FileType"] t_dict[ft] = t_dict.setdefault(ft, 0) + 1 ft_dict[ancestor] = t_dict return ft_dict def __getLFNsFromFC(self): """Check if a list of LFNs is in the FC or not""" if not self.lfns: directories = [] for dirName in self.__getDirectories(): if not dirName.endswith("/"): dirName += "/" directories.append(dirName) present, notPresent = self.getReplicasPresenceFromDirectoryScan( directories) else: present, notPresent = self.getReplicasPresence(self.lfns) return present, notPresent def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = { "AllReplicasCorrupted": {}, "SomeReplicasCorrupted": {}, "MissingReplica": {}, "MissingAllReplicas": {}, "NoReplicas": {}, } chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write(".") replicasRes = self.fileCatalog.getReplicas(lfnChunk) if not replicasRes["OK"]: gLogger.error("error: %s" % replicasRes["Message"]) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes["Message"]) replicasRes = replicasRes["Value"] if replicasRes["Failed"]: retDict["NoReplicas"].update(replicasRes["Failed"]) replicas.update(replicasRes["Successful"]) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas, chunkSize): self.__write(".") res = self.fileCatalog.getFileMetadata(lfnChunk) if not res["OK"]: return S_ERROR(errno.ENOENT, "error %s" % res["Message"]) metadata.update(res["Value"]["Successful"]) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})["FCChecksum"] = metadata.get( lfn, {}).get("Checksum") for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice("Getting checksum of %d replicas in %d SEs" % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel("FATAL") for num, se in enumerate(sorted(seFiles)): self.__write("\n%d. At %s (%d files): " % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write(".") metadata = oSe.getFileMetadata(surlChunk) if not metadata["OK"]: gLogger.error( "Error: getFileMetadata returns %s. Ignore those replicas" % (metadata["Message"])) # Remove from list of replicas as we don't know whether it is OK or # not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata["Value"] notFound += len(metadata["Failed"]) for lfn in metadata["Failed"]: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata["Successful"]: checkSum.setdefault( lfn, {})[se] = metadata["Successful"][lfn]["Checksum"] if notFound: gLogger.error("%d files not found" % notFound) gLogger.setLevel(logLevel) gLogger.notice("Verifying checksum of %d files" % len(replicas)) for lfn in replicas: # get the lfn checksum from the FC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True fcChecksum = csDict[lfn].pop("FCChecksum") for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, "") if not surlChecksum or not compareAdler( fcChecksum, surlChecksum): # if fcChecksum does not match surlChecksum csDict[lfn][se] = {"PFNChecksum": surlChecksum} gLogger.info( "ERROR!! checksum mismatch at %s for LFN %s: FC checksum: %s , PFN checksum : %s " % (se, lfn, fcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict["MissingAllReplicas"][lfn] = "All" else: gLogger.info("=> All replicas have bad checksum", lfn) retDict["AllReplicasCorrupted"][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict["MissingReplica"][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict["SomeReplicasCorrupted"][lfn] = csDict[lfn] return S_OK(retDict) ########################################################################## # properties def set_prod(self, value): """Setter""" if value: value = int(value) res = self.transClient.getTransformation(value, extraParams=False) if not res["OK"]: raise Exception("Couldn't find transformation %d: %s" % (value, res["Message"])) else: self.transType = res["Value"]["Type"] if self.interactive: gLogger.info("Production %d has type %s" % (value, self.transType)) else: value = 0 self._prod = value def get_prod(self): """Getter""" return self._prod prod = property(get_prod, set_prod) def set_fileType(self, value): """Setter""" self._fileType = [ft.upper() for ft in value] def get_fileType(self): """Getter""" return self._fileType fileType = property(get_fileType, set_fileType) def set_fileTypesExcluded(self, value): """Setter""" self._fileTypesExcluded = [ft.upper() for ft in value] def get_fileTypesExcluded(self): """Getter""" return self._fileTypesExcluded fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded) def set_lfns(self, value): """Setter""" if isinstance(value, six.string_types): value = [value] value = [v.replace(" ", "").replace("//", "/") for v in value] self._lfns = value def get_lfns(self): """Getter""" return self._lfns lfns = property(get_lfns, set_lfns) ########################################################################## # # This part was backported from DataIntegrityClient # # # This section contains the specific methods for File Catalog->SE checks # def catalogDirectoryToSE(self, lfnDir): """This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfnDir, six.string_types): lfnDir = [lfnDir] res = self._getCatalogDirectoryContents(lfnDir) if not res["OK"]: return res replicas = res["Value"]["Replicas"] catalogMetadata = res["Value"]["Metadata"] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res["OK"]: return res resDict = { "CatalogMetadata": catalogMetadata, "CatalogReplicas": replicas } return S_OK(resDict) def catalogFileToSE(self, lfns): """This obtains the replica and metadata information from the catalog and checks against the storage elements.""" gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfns, six.string_types): lfns = [lfns] res = self._getCatalogMetadata(lfns) if not res["OK"]: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res["Value"] res = self._getCatalogReplicas(list(catalogMetadata)) if not res["OK"]: return res replicas, _zeroReplicaFiles = res["Value"] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res["OK"]: return res resDict = { "CatalogMetadata": catalogMetadata, "CatalogReplicas": replicas } return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None): """This method takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ # FIXME: we better use the compareChecksum function instead of this one! # or maybe directly checkFC2SE gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) seLfns = {} for lfn, replicaDict in replicas.items(): for se, _url in replicaDict.items(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info("%s %s" % ("Storage Element".ljust(20), "Replicas".rjust(20))) for se in sorted(seLfns): files = len(seLfns[se]) gLogger.info("%s %s" % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res["OK"]: gLogger.error("Failed to get physical file metadata.", res["Message"]) return res for lfn, metadata in res["Value"].items(): if lfn in catalogMetadata: # and ( metadata['Size'] != 0 ): if metadata["Size"] != catalogMetadata[lfn]["Size"]: sizeMismatch.append((lfn, "deprecatedUrl", se, "CatalogPFNSizeMismatch")) if sizeMismatch: self.dic.reportProblematicReplicas(sizeMismatch, se, "CatalogPFNSizeMismatch") return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """Check obtain the physical file metadata and check the files are available""" gLogger.info("Checking the integrity of %s physical files at %s" % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res["OK"]: gLogger.error("Failed to get metadata for lfns.", res["Message"]) return res pfnMetadata = res["Value"]["Successful"] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res["Value"]["Failed"].items(): if re.search("File does not exist", reason): missingReplicas.append( (lfn, "deprecatedUrl", se, "PFNMissing")) if missingReplicas: self.dic.reportProblematicReplicas(missingReplicas, se, "PFNMissing") lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, metadata in pfnMetadata.items(): if metadata.get("Lost"): lostReplicas.append((lfn, "deprecatedUrl", se, "PFNLost")) if metadata.get("Unavailable") or not metadata["Accessible"]: unavailableReplicas.append( (lfn, "deprecatedUrl", se, "PFNUnavailable")) if not metadata["Size"]: zeroSizeReplicas.append( (lfn, "deprecatedUrl", se, "PFNZeroSize")) if lostReplicas: self.dic.reportProblematicReplicas(lostReplicas, se, "PFNLost") if unavailableReplicas: self.dic.reportProblematicReplicas(unavailableReplicas, se, "PFNUnavailable") if zeroSizeReplicas: self.dic.reportProblematicReplicas(zeroSizeReplicas, se, "PFNZeroSize") gLogger.info( "Checking the integrity of physical files at %s complete" % se) return S_OK(pfnMetadata) ########################################################################## # # This section contains the specific methods for SE->File Catalog checks # def _getCatalogDirectoryContents(self, lfnDirs): """Obtain the contents of the supplied directory, recursively""" def _getDirectoryContent(directory): """Inner function: recursively scan a directory, returns list of LFNs""" filesInDirectory = {} gLogger.debug("Examining %s" % directory) res = self.fileCatalog.listDirectory(directory) if not res["OK"]: gLogger.error("Failed to get directory contents", res["Message"]) return res if directory in res["Value"]["Failed"]: gLogger.error( "Failed to get directory content", "%s %s" % (directory, res["Value"]["Failed"][directory])) return S_ERROR("Failed to get directory content") if directory not in res["Value"]["Successful"]: return S_ERROR("Directory not existing?") # first, adding the files found in the current directory gLogger.debug( "Files in %s: %d" % (directory, len( res["Value"]["Successful"][directory]["Files"]))) filesInDirectory.update( res["Value"]["Successful"][directory]["Files"]) # then, looking for subDirectories content if res["Value"]["Successful"][directory]["SubDirs"]: for l_dir in res["Value"]["Successful"][directory]["SubDirs"]: # recursion here subDirContent = _getDirectoryContent(l_dir) if not subDirContent["OK"]: return subDirContent else: filesInDirectory.update(subDirContent["Value"]) return S_OK(filesInDirectory) gLogger.info("Obtaining the catalog contents for %d directories" % len(lfnDirs)) allFiles = {} for lfnDir in lfnDirs: dirContent = _getDirectoryContent(lfnDir) if not dirContent["OK"]: return dirContent else: gLogger.debug("Content of directory %s: %d files" % (lfnDir, len(dirContent["Value"]))) allFiles.update(dirContent["Value"]) gLogger.debug("Content of directories examined: %d files" % len(allFiles)) replicas = self.fileCatalog.getReplicas(list(allFiles)) if not replicas["OK"]: return replicas if replicas["Value"]["Failed"]: return S_ERROR("Failures in replicas discovery") return S_OK({ "Metadata": allFiles, "Replicas": replicas["Value"]["Successful"] }) def _getCatalogReplicas(self, lfns): """Obtain the file replicas from the catalog while checking that there are replicas""" if not lfns: return S_OK(([], [])) gLogger.info("Obtaining the replicas for %s files" % len(lfns)) zeroReplicaFiles = [] res = self.fileCatalog.getReplicas(lfns, allStatus=True) if not res["OK"]: gLogger.error("Failed to get catalog replicas", res["Message"]) return res allReplicas = res["Value"]["Successful"] for lfn, error in res["Value"]["Failed"].items(): if re.search("File has zero replicas", error): zeroReplicaFiles.append(lfn) gLogger.info("Obtaining the replicas for files complete") return S_OK((allReplicas, zeroReplicaFiles)) def _getCatalogMetadata(self, lfns): """Obtain the file metadata from the catalog while checking they exist""" allMetadata = [] missingCatalogFiles = [] zeroSizeFiles = [] if not lfns: return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles)) gLogger.info("Obtaining the catalog metadata for %s files" % len(lfns)) res = self.fileCatalog.getFileMetadata(lfns) if not res["OK"]: gLogger.error("Failed to get catalog metadata", res["Message"]) return res allMetadata = res["Value"]["Successful"] for lfn, error in res["Value"]["Failed"].items(): if re.search("No such file or directory", error): missingCatalogFiles.append(lfn) gLogger.info("Obtaining the catalog metadata complete") return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
class ConsistencyInspector(object): """ A class for handling some consistency checks """ def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None): """ c'tor interactive: Data Manager (True) or DIRAC Agente (False) transClient: TransformationClient() if None, else transClient params dm: DataManager() if None, else dm params fc: FileCatalog() if None, else fc params One object for every production/directoriesList... """ self.interactive = interactive self.transClient = TransformationClient() if transClient is None else transClient self.dataManager = dm if dm else DataManager() self.fileCatalog = fc if fc else FileCatalog() self.dic = dic if dic else DataIntegrityClient() self.dirac = Dirac() # Base elements from which to start the consistency checks self._prod = 0 self._bkQuery = None self._fileType = [] self._fileTypesExcluded = [] self._lfns = [] self.noLFC = False self.directories = [] # Accessory elements self.runsList = [] self.runStatus = None self.fromProd = None self.transType = '' self.cachedReplicas = {} self.prcdWithDesc = [] self.prcdWithoutDesc = [] self.prcdWithMultDesc = [] self.nonPrcdWithDesc = [] self.nonPrcdWithoutDesc = [] self.nonPrcdWithMultDesc = [] self.descForPrcdLFNs = [] self.descForNonPrcdLFNs = [] self.removedFiles = [] self.absentLFNsInFC = [] self.existLFNsNoSE = {} self.existLFNsBadReplicas = {} self.existLFNsBadFiles = {} self.existLFNsNotExisting = {} self.commonAncestors = {} self.multipleDescendants = {} self.ancestors = {} self._verbose = False def __logVerbose(self, msg, msg1=''): """ logger helper for verbose information """ if self._verbose: newMsg = '[ConsistencyChecks] ' + ('[%s] ' % str(self.prod)) if self.prod else '' # Add that prefix to all lines of the message newMsg1 = msg1.replace('\n', '\n' + newMsg) newMsg += msg.replace('\n', '\n' + newMsg) gLogger.notice(newMsg, newMsg1) else: gLogger.verbose(msg, msg1) ########################################################################## def checkFC2SE(self): """ check files vs SE information """ repDict = self.compareChecksum(self.lfns) self.existLFNsNoSE = repDict['MissingReplica'] self.existLFNsNotExisting = repDict['MissingAllReplicas'] self.existLFNsBadReplicas = repDict['SomeReplicasCorrupted'] self.existLFNsBadFiles = repDict['AllReplicasCorrupted'] def getReplicasPresence(self, lfns): """ get the replicas using the standard FileCatalog.getReplicas() """ present = set() notPresent = set() chunkSize = 100 printProgress = (len(lfns) > chunkSize) startTime = time.time() self.__write("Checking replicas for %d files%s" % (len(lfns), (' (chunks of %d)' % chunkSize) if printProgress else '... ')) for chunk in breakListIntoChunks(lfns, chunkSize): if printProgress: self.__write('.') for _ in xrange(1, 10): res = self.fileCatalog.getReplicas(chunk) if res['OK']: present.update(res['Value']['Successful']) self.cachedReplicas.update(res['Value']['Successful']) notPresent.update(res['Value']['Failed']) break else: time.sleep(0.1) self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) if notPresent: self.__logVerbose("Files without replicas:", '\n'.join([''] + sorted(notPresent))) return list(present), list(notPresent) ########################################################################## def getReplicasPresenceFromDirectoryScan(self, lfns): """ Get replicas scanning the directories. Might be faster. """ dirs = {} present = [] notPresent = [] compare = True for lfn in lfns: dirN = os.path.dirname(lfn) if lfn == dirN + '/': compare = False dirs.setdefault(dirN, []).append(lfn) if compare: self.__write("Checking File Catalog for %d files from %d directories " % ( len(lfns), len(dirs))) else: self.__write("Getting files from %d directories " % len(dirs)) startTime = time.time() for dirN in sorted(dirs): startTime1 = time.time() self.__write('.') lfnsFound = self._getFilesFromDirectoryScan(dirN) gLogger.verbose("Obtained %d files in %.1f seconds" % (len(lfnsFound), time.time() - startTime1)) if compare: pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound) notPresent += notPr present += pr else: present += lfnsFound self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) gLogger.info("Found %d files with replicas and %d without" % (len(present), len(notPresent))) return present, notPresent ########################################################################## def __compareLFNLists(self, lfns, lfnsFound): """ return files in both lists and files in lfns and not in lfnsFound """ present = [] notPresent = lfns startTime = time.time() self.__logVerbose("Comparing list of %d LFNs with second list of %d" % ( len(lfns), len(lfnsFound))) if lfnsFound: setLfns = set(lfns) setLfnsFound = set(lfnsFound) present = list(setLfns & setLfnsFound) notPresent = list(setLfns - setLfnsFound) self.__logVerbose("End of comparison: %.1f seconds" % (time.time() - startTime)) return present, notPresent def _getFilesFromDirectoryScan(self, dirs): """ calls dm.getFilesFromDirectory """ level = gLogger.getLevel() gLogger.setLevel('FATAL') res = self.dataManager.getFilesFromDirectory(dirs) gLogger.setLevel(level) if not res['OK']: if 'No such file or directory' not in res['Message']: gLogger.error("Error getting files from directories %s:" % dirs, res['Message']) return [] if res['Value']: lfnsFound = res['Value'] else: lfnsFound = [] return lfnsFound ########################################################################## def _getTSFiles(self): """ Helper function - get files from the TS """ selectDict = {'TransformationID': self.prod} if self._lfns: selectDict['LFN'] = self._lfns elif self.runStatus and self.fromProd: res = self.transClient.getTransformationRuns( {'TransformationID': self.fromProd, 'Status': self.runStatus}) if not res['OK']: gLogger.error("Failed to get runs for transformation %d" % self.prod) else: if res['Value']: self.runsList.extend( [run['RunNumber'] for run in res['Value'] if run['RunNumber'] not in self.runsList]) gLogger.notice("%d runs selected" % len(res['Value'])) elif not self.runsList: gLogger.notice("No runs selected, check completed") DIRAC.exit(0) if not self._lfns and self.runsList: selectDict['RunNumber'] = self.runsList res = self.transClient.getTransformation(self.prod) if not res['OK']: gLogger.error("Failed to find transformation %s" % self.prod) return [], [], [] status = res['Value']['Status'] if status not in ('Active', 'Stopped', 'Completed', 'Idle'): gLogger.notice("Transformation %s in status %s, will not check if files are processed" % ( self.prod, status)) processedLFNs = [] nonProcessedLFNs = [] nonProcessedStatuses = [] if self._lfns: processedLFNs = self._lfns else: res = self.transClient.getTransformationFiles(selectDict) if not res['OK']: gLogger.error("Failed to get files for transformation %d" % self.prod, res['Message']) return [], [], [] else: processedLFNs = [item['LFN'] for item in res['Value'] if item['Status'] == 'Processed'] nonProcessedLFNs = [item['LFN'] for item in res['Value'] if item['Status'] != 'Processed'] nonProcessedStatuses = list( set(item['Status'] for item in res['Value'] if item['Status'] != 'Processed')) return processedLFNs, nonProcessedLFNs, nonProcessedStatuses def __getDirectories(self): """ get the directories where to look into (they are either given, or taken from the transformation ID """ if self.directories: directories = [] printout = False for directory in self.directories: if not directory.endswith('...'): directories.append(directory) else: printout = True topDir = os.path.dirname(directory) res = self.fileCatalog.listDirectory(topDir) if not res['OK']: # DError(errno.ENOENT, res['Message'] ) return S_ERROR(errno.ENOENT, res['Message']) else: matchDir = directory.split('...')[0] directories += [d for d in res['Value']['Successful'].get(topDir, {}).get('SubDirs', []) if d.startswith(matchDir)] if printout: gLogger.always('Expanded list of %d directories:\n%s' % (len(directories), '\n'.join(directories))) return directories else: return S_ERROR(errno.ENOENT, 'Need to specify the directories') ########################################################################## def __write(self, text): if self.interactive: sys.stdout.write(text) sys.stdout.flush() ########################################################################## def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None): """ Select only those files from the values of lfnDict that have a certain type """ if not lfnDict: return {} if not fileTypes: fileTypes = self.fileType if not fileTypesExcluded: fileTypesExcluded = self.fileTypesExcluded else: fileTypesExcluded += [ ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded] # lfnDict is a dictionary of dictionaries including the metadata, create a # deep copy to get modified ancDict = dict(lfnDict) if fileTypes == ['']: fileTypes = [] # and loop on the original dictionaries for ancestor in lfnDict: for desc in lfnDict[ancestor].keys(): ft = lfnDict[ancestor][desc]['FileType'] if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes): ancDict[ancestor].pop(desc) if not len(ancDict[ancestor]): ancDict.pop(ancestor) return ancDict @staticmethod def _getFileTypesCount(lfnDict): """ return file types count """ ft_dict = {} for ancestor in lfnDict: t_dict = {} for desc in lfnDict[ancestor]: ft = lfnDict[ancestor][desc]['FileType'] t_dict[ft] = t_dict.setdefault(ft, 0) + 1 ft_dict[ancestor] = t_dict return ft_dict def __getLFNsFromFC(self): """ Check if a list of LFNs is in the FC or not """ if not self.lfns: directories = [] for dirName in self.__getDirectories(): if not dirName.endswith('/'): dirName += '/' directories.append(dirName) present, notPresent = self.getReplicasPresenceFromDirectoryScan( directories) else: present, notPresent = self.getReplicasPresence(self.lfns) return present, notPresent def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = {'AllReplicasCorrupted': {}, 'SomeReplicasCorrupted': {}, 'MissingReplica': {}, 'MissingAllReplicas': {}, 'NoReplicas': {}} chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write('.') replicasRes = self.fileCatalog.getReplicas(lfnChunk) if not replicasRes['OK']: gLogger.error("error: %s" % replicasRes['Message']) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes['Message']) replicasRes = replicasRes['Value'] if replicasRes['Failed']: retDict['NoReplicas'].update(replicasRes['Failed']) replicas.update(replicasRes['Successful']) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas, chunkSize): self.__write('.') res = self.fileCatalog.getFileMetadata(lfnChunk) if not res['OK']: return S_ERROR(errno.ENOENT, "error %s" % res['Message']) metadata.update(res['Value']['Successful']) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get( lfn, {}).get('Checksum') for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice('Getting checksum of %d replicas in %d SEs' % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel('FATAL') for num, se in enumerate(sorted(seFiles)): self.__write('\n%d. At %s (%d files): ' % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write('.') metadata = oSe.getFileMetadata(surlChunk) if not metadata['OK']: gLogger.error("Error: getFileMetadata returns %s. Ignore those replicas" % ( metadata['Message'])) # Remove from list of replicas as we don't know whether it is OK or # not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata['Value'] notFound += len(metadata['Failed']) for lfn in metadata['Failed']: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata['Successful']: checkSum.setdefault( lfn, {})[se] = metadata['Successful'][lfn]['Checksum'] if notFound: gLogger.error('%d files not found' % notFound) gLogger.setLevel(logLevel) gLogger.notice('Verifying checksum of %d files' % len(replicas)) for lfn in replicas: # get the lfn checksum from the FC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True lfcChecksum = csDict[lfn].pop('LFCChecksum') for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, '') if not surlChecksum or not compareAdler(lfcChecksum, surlChecksum): # if lfcChecksum does not match surlChecksum csDict[lfn][se] = {'PFNChecksum': surlChecksum} gLogger.info("ERROR!! checksum mismatch at %s for LFN %s: LFC checksum: %s , PFN checksum : %s " % (se, lfn, lfcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict['MissingAllReplicas'][lfn] = 'All' else: gLogger.info("=> All replicas have bad checksum", lfn) retDict['AllReplicasCorrupted'][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict['MissingReplica'][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn] return S_OK(retDict) ########################################################################## # properties def set_prod(self, value): """ Setter """ if value: value = int(value) res = self.transClient.getTransformation(value, extraParams=False) if not res['OK']: S_ERROR(errno.ENOENT, "Couldn't find transformation %d: %s" % (value, res['Message'])) else: self.transType = res['Value']['Type'] if self.interactive: gLogger.info("Production %d has type %s" % (value, self.transType)) else: value = 0 self._prod = value def get_prod(self): """ Getter """ return self._prod prod = property(get_prod, set_prod) def set_fileType(self, value): """ Setter """ self._fileType = [ft.upper() for ft in value] def get_fileType(self): """ Getter """ return self._fileType fileType = property(get_fileType, set_fileType) def set_fileTypesExcluded(self, value): """ Setter """ self._fileTypesExcluded = [ft.upper() for ft in value] def get_fileTypesExcluded(self): """ Getter """ return self._fileTypesExcluded fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded) def set_lfns(self, value): """ Setter """ if isinstance(value, basestring): value = [value] value = [v.replace(' ', '').replace('//', '/') for v in value] self._lfns = value def get_lfns(self): """ Getter """ return self._lfns lfns = property(get_lfns, set_lfns) ########################################################################## # # This part was backported from DataIntegrityClient # # # This section contains the specific methods for File Catalog->SE checks # def catalogDirectoryToSE(self, lfnDir): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self._getCatalogDirectoryContents(lfnDir) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = {'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas} return S_OK(resDict) def catalogFileToSE(self, lfns): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfns, basestring): lfns = [lfns] res = self._getCatalogMetadata(lfns) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] res = self._getCatalogReplicas(catalogMetadata.keys()) if not res['OK']: return res replicas, _zeroReplicaFiles = res['Value'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = {'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas} return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None): """ This method takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ # FIXME: we better use the compareChecksum function instead of this one! # or maybe directly checkFC2SE gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) seLfns = {} for lfn, replicaDict in replicas.iteritems(): for se, _url in replicaDict.iteritems(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20))) for se in sorted(seLfns): files = len(seLfns[se]) gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res['OK']: gLogger.error('Failed to get physical file metadata.', res['Message']) return res for lfn, metadata in res['Value'].iteritems(): if lfn in catalogMetadata: # and ( metadata['Size'] != 0 ): if metadata['Size'] != catalogMetadata[lfn]['Size']: sizeMismatch.append( (lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch') return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """ Check obtain the physical file metadata and check the files are available """ gLogger.info('Checking the integrity of %s physical files at %s' % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get metadata for lfns.', res['Message']) return res pfnMetadata = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res['Value']['Failed'].iteritems(): if re.search('File does not exist', reason): missingReplicas.append((lfn, 'deprecatedUrl', se, 'PFNMissing')) if missingReplicas: self.dic.reportProblematicReplicas(missingReplicas, se, 'PFNMissing') lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, metadata in pfnMetadata.iteritems(): if metadata.get('Lost'): lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost')) if metadata.get('Unavailable') or not metadata['Accessible']: unavailableReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNUnavailable')) if not metadata['Size']: zeroSizeReplicas.append((lfn, 'deprecatedUrl', se, 'PFNZeroSize')) if lostReplicas: self.dic.reportProblematicReplicas(lostReplicas, se, 'PFNLost') if unavailableReplicas: self.dic.reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable') if zeroSizeReplicas: self.dic.reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize') gLogger.info( 'Checking the integrity of physical files at %s complete' % se) return S_OK(pfnMetadata) ########################################################################## # # This section contains the specific methods for SE->File Catalog checks # def storageDirectoryToCatalog(self, lfnDir, storageElement): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info("-" * 40) gLogger.info("Performing the SE->FC check at %s" % storageElement) gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self.getStorageDirectoryContents(lfnDir, storageElement) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement) return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}}) def __checkCatalogForSEFiles(self, storageMetadata, storageElement): gLogger.info('Checking %s storage files exist in the catalog' % len(storageMetadata)) res = self.fileCatalog.getReplicas(storageMetadata) if not res['OK']: gLogger.error("Failed to get replicas for LFN", res['Message']) return res failedLfns = res['Value']['Failed'] successfulLfns = res['Value']['Successful'] notRegisteredLfns = [] for lfn in storageMetadata: if lfn in failedLfns: if 'No such file or directory' in failedLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) failedLfns.pop(lfn) elif storageElement not in successfulLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) if notRegisteredLfns: self.dic.reportProblematicReplicas( notRegisteredLfns, storageElement, 'LFNNotRegistered') if failedLfns: return S_ERROR(errno.ENOENT, 'Failed to obtain replicas') # For the LFNs found to be registered obtain the file metadata from the # catalog and verify against the storage metadata res = self._getCatalogMetadata(storageMetadata) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.iteritems(): lfnStorageMetadata = storageMetadata[lfn] if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (lfnStorageMetadata['Size'] != 0): sizeMismatch.append( (lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch') gLogger.info('Checking storage files exist in the catalog complete') resDict = {'CatalogMetadata': catalogMetadata, 'StorageMetadata': storageMetadata} return S_OK(resDict) def getStorageDirectoryContents(self, lfnDir, storageElement): """ This takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ gLogger.info('Obtaining the contents for %s directories at %s' % (len(lfnDir), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnDir) if not res['OK']: gLogger.error( "Failed to obtain existance of directories", res['Message']) return res for directory, error in res['Value']['Failed'].iteritems(): gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, 'Failed to determine existance of directory') directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sorted(directoryExists): exists = directoryExists[directory] if exists: activeDirs.append(directory) allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = se.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif currentDir in res['Value']['Failed']: gLogger.error('Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) return S_ERROR(errno.ENOENT, res['Value']['Failed'][currentDir]) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend(se.getLFNFromURL(dirContents['SubDirs']).get( 'Value', {}).get('Successful', [])) fileURLMetadata = dirContents['Files'] fileMetadata = {} res = se.getLFNFromURL(fileURLMetadata) if not res['OK']: gLogger.error('Failed to get directory content LFNs', res['Message']) return res for url, error in res['Value']['Failed'].iteritems(): gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, "Failed to get LFNs for PFNs") urlLfns = res['Value']['Successful'] for urlLfn, lfn in urlLfns.iteritems(): fileMetadata[lfn] = fileURLMetadata[urlLfn] allFiles.update(fileMetadata) zeroSizeFiles = [] for lfn in sorted(allFiles): if os.path.basename(lfn) == 'dirac_directory': allFiles.pop(lfn) else: metadata = allFiles[lfn] if not metadata['Size']: zeroSizeFiles.append( (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize')) if zeroSizeFiles: self.dic.reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize') gLogger.info('Obtained at total of %s files for directories at %s' % (len(allFiles), storageElement)) return S_OK(allFiles) def _getCatalogDirectoryContents(self, lfnDirs): """ Obtain the contents of the supplied directory, recursively """ def _getDirectoryContent(directory): """ Inner function: recursively scan a directory, returns list of LFNs """ filesInDirectory = {} gLogger.debug("Examining %s" % directory) res = self.fileCatalog.listDirectory(directory) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res if directory in res['Value']['Failed']: gLogger.error('Failed to get directory content', '%s %s' % (directory, res['Value']['Failed'][directory])) return S_ERROR('Failed to get directory content') if directory not in res['Value']['Successful']: return S_ERROR('Directory not existing?') # first, adding the files found in the current directory gLogger.debug("Files in %s: %d" % (directory, len( res['Value']['Successful'][directory]['Files']))) filesInDirectory.update(res['Value']['Successful'][directory]['Files']) # then, looking for subDirectories content if res['Value']['Successful'][directory]['SubDirs']: for l_dir in res['Value']['Successful'][directory]['SubDirs']: # recursion here subDirContent = _getDirectoryContent(l_dir) if not subDirContent['OK']: return subDirContent else: filesInDirectory.update(subDirContent['Value']) return S_OK(filesInDirectory) gLogger.info( 'Obtaining the catalog contents for %d directories' % len(lfnDirs)) allFiles = {} for lfnDir in lfnDirs: dirContent = _getDirectoryContent(lfnDir) if not dirContent['OK']: return dirContent else: gLogger.debug("Content of directory %s: %d files" % (lfnDir, len(dirContent['Value']))) allFiles.update(dirContent['Value']) gLogger.debug("Content of directories examined: %d files" % len(allFiles)) replicas = self.fileCatalog.getReplicas(list(allFiles)) if not replicas['OK']: return replicas if replicas['Value']['Failed']: return S_ERROR("Failures in replicas discovery") return S_OK({'Metadata': allFiles, 'Replicas': replicas['Value']['Successful']}) def _getCatalogReplicas(self, lfns): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info('Obtaining the replicas for %s files' % len(lfns)) zeroReplicaFiles = [] res = self.fileCatalog.getReplicas(lfns, allStatus=True) if not res['OK']: gLogger.error('Failed to get catalog replicas', res['Message']) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('File has zero replicas', error): zeroReplicaFiles.append(lfn) gLogger.info('Obtaining the replicas for files complete') return S_OK((allReplicas, zeroReplicaFiles)) def _getCatalogMetadata(self, lfns): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK({}) gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns)) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fileCatalog.getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get catalog metadata', res['Message']) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('No such file or directory', error): missingCatalogFiles.append(lfn) gLogger.info('Obtaining the catalog metadata complete') return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
class fakeClient: def __init__(self, trans, transID, lfns, asIfProd): self.trans = trans self.transID = transID from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient self.transClient = TransformationClient() from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient self.bk = BookkeepingClient() from DIRAC.DataManagementSystem.Client.DataManager import DataManager self.dm = DataManager() self.asIfProd = asIfProd (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns) def addFilesToTransformation(self, transID, lfns): return S_OK({ 'Failed': {}, 'Successful': dict([(lfn, 'Added') for lfn in lfns]) }) def getTransformation(self, transID, extraParams=False): if transID == self.transID and self.asIfProd: transID = self.asIfProd if transID != self.transID: return self.transClient.getTransformation(transID) res = self.trans.getType() return DIRAC.S_OK({'Type': res['Value']}) def getReplicas(self): return self.transReplicas def getFiles(self): return self.transFiles def getCounters(self, table, attrList, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] != self.transID: return self.transClient.getCounters(table, attrList, condDict) possibleTargets = [ 'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW', 'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW' ] counters = [] for se in possibleTargets: counters.append(({'UsedSE': se}, 0)) return DIRAC.S_OK(counters) def getBookkeepingQuery(self, transID): if transID == self.transID and self.asIfProd: return self.transClient.getBookkeepingQuery(asIfProd) return self.trans.getBkQuery() def insertTransformationRun(self, transID, runID, xx): return DIRAC.S_OK() def getTransformationRuns(self, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] == self.transID: transRuns = [] runs = condDict.get('RunNumber', []) if not runs and self.transFiles: res = self.bk.getFileMetadata( [fileDict['LFN'] for fileDict in self.transFiles]) if not res['OK']: return res runs = list( set(meta['RunNumber'] for meta in res['Value']['Successful'].itervalues())) for run in runs: transRuns.append({ 'RunNumber': run, 'Status': "Active", "SelectedSite": None }) return DIRAC.S_OK(transRuns) else: return self.transClient.getTransformationRuns(condDict) def getTransformationFiles(self, condDict=None): if condDict.get('TransformationID') == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict.get('TransformationID') == self.transID: transFiles = [] if 'Status' in condDict and 'Unused' not in condDict['Status']: return DIRAC.S_OK(transFiles) runs = None if 'RunNumber' in condDict: runs = condDict['RunNumber'] if not isinstance(runs, list): runs = [runs] for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: transFiles.append({ 'LFN': fileDict['LFN'], 'Status': 'Unused', 'RunNumber': fileDict['RunNumber'] }) return DIRAC.S_OK(transFiles) else: return self.transClient.getTransformationFiles(condDict=condDict) def setParameterToTransformationFiles(self, transID, lfnDict): """ Update the transFiles with some parameters """ if transID == self.transID: for fileDict in self.transFiles: fileDict.update(lfnDict.get(fileDict['LFN'], {})) return S_OK() else: return self.transClient.setParameterToTransformationFiles( transID, lfnDict) def getTransformationFilesCount(self, transID, field, selection=None): if selection is None: selection = {} if transID == self.transID or selection.get( 'TransformationID') == self.transID: runs = selection.get('RunNumber') if runs and not isinstance(runs, list): runs = [runs] if field == 'Status': counters = {'Unused': 0} for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: counters['Unused'] += 1 elif field == 'RunNumber': counters = {} for fileDict in self.transFiles: runID = fileDict['RunNumber'] if not runs or runID in runs: counters.setdefault(runID, 0) counters[runID] += 1 else: return DIRAC.S_ERROR('Not implemented for field ' + field) counters['Total'] = sum(count for count in counters.itervalues()) return DIRAC.S_OK(counters) else: return self.transClient.getTransformationFilesCount( transID, field, selection=selection) def getTransformationRunStats(self, transIDs): counters = {} for transID in transIDs: if transID == self.transID: for fileDict in self.transFiles: runID = fileDict['RunNumber'] counters[transID][runID]['Unused'] = counters.setdefault( transID, {}).setdefault(runID, {}).setdefault( 'Unused', 0) + 1 for runID in counters[transID]: counters[transID][runID]['Total'] = counters[transID][ runID]['Unused'] else: res = self.transClient.getTransformationRunStats(transIDs) if res['OK']: counters.update(res['Value']) else: return res return DIRAC.S_OK(counters) def addRunsMetadata(self, runID, val): return self.transClient.addRunsMetadata(runID, val) def getRunsMetadata(self, runID): return self.transClient.getRunsMetadata(runID) def setTransformationRunStatus(self, transID, runID, status): return DIRAC.S_OK() def setTransformationRunsSite(self, transID, runID, site): return DIRAC.S_OK() def setFileStatusForTransformation(self, transID, status, lfns): return DIRAC.S_OK() def addTransformationRunFiles(self, transID, run, lfns): return DIRAC.S_OK() def setDestinationForRun(self, runID, site): return DIRAC.S_OK() def getDestinationForRun(self, runID): return self.transClient.getDestinationForRun(runID) def prepareForPlugin(self, lfns): import time print "Preparing the plugin input data (%d files)" % len(lfns) type = self.trans.getType()['Value'] if not lfns: return (None, None) res = self.bk.getFileMetadata(lfns) if res['OK']: files = [] for lfn, metadata in res['Value']['Successful'].iteritems(): runID = metadata.get('RunNumber', 0) runDict = {"RunNumber": runID, "LFN": lfn} files.append(runDict) else: print "Error getting BK metadata", res['Message'] return ([], {}) replicas = {} startTime = time.time() from DIRAC.Core.Utilities.List import breakListIntoChunks for lfnChunk in breakListIntoChunks(lfns, 200): # print lfnChunk if type.lower() in ("replication", "removal"): res = self.dm.getReplicas(lfnChunk, getUrl=False) else: res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False) # print res if res['OK']: for lfn, ses in res['Value']['Successful'].iteritems(): if ses: replicas[lfn] = sorted(ses) else: print "Error getting replicas of %d files:" % len( lfns), res['Message'] print "Obtained replicas of %d files in %.3f seconds" % ( len(lfns), time.time() - startTime) return (files, replicas)
class ConsistencyInspector(object): """ A class for handling some consistency checks """ def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None): """ c'tor interactive: Data Manager (True) or DIRAC Agente (False) transClient: TransformationClient() if None, else transClient params dm: DataManager() if None, else dm params fc: FileCatalog() if None, else fc params One object for every production/directoriesList... """ self.interactive = interactive self.transClient = TransformationClient( ) if transClient is None else transClient self.dm = dm if dm else DataManager() self.fc = fc if fc else FileCatalog() self.dic = dic if dic else DataIntegrityClient() self.dirac = Dirac() # Base elements from which to start the consistency checks self._prod = 0 self._bkQuery = None self._fileType = [] self._fileTypesExcluded = [] self._lfns = [] self.noLFC = False self.directories = [] # Accessory elements self.runsList = [] self.runStatus = None self.fromProd = None self.transType = '' self.cachedReplicas = {} self.prcdWithDesc = [] self.prcdWithoutDesc = [] self.prcdWithMultDesc = [] self.nonPrcdWithDesc = [] self.nonPrcdWithoutDesc = [] self.nonPrcdWithMultDesc = [] self.descForPrcdLFNs = [] self.descForNonPrcdLFNs = [] self.removedFiles = [] self.absentLFNsInFC = [] self.existLFNsNoSE = {} self.existLFNsBadReplicas = {} self.existLFNsBadFiles = {} self.existLFNsNotExisting = {} self.commonAncestors = {} self.multipleDescendants = {} self.ancestors = {} self._verbose = False def __logVerbose(self, msg, msg1=''): if self._verbose: newMsg = '[ConsistencyChecks] ' + ( '[%s] ' % str(self.prod)) if self.prod else '' # Add that prefix to all lines of the message newMsg1 = msg1.replace('\n', '\n' + newMsg) newMsg += msg.replace('\n', '\n' + newMsg) gLogger.notice(newMsg, newMsg1) else: gLogger.verbose(msg, msg1) ################################################################################ def checkFC2SE(self): repDict = self.compareChecksum(self.lfns) self.existLFNsNoSE = repDict['MissingReplica'] self.existLFNsNotExisting = repDict['MissingAllReplicas'] self.existLFNsBadReplicas = repDict['SomeReplicasCorrupted'] self.existLFNsBadFiles = repDict['AllReplicasCorrupted'] def getReplicasPresence(self, lfns): """ get the replicas using the standard FileCatalog.getReplicas() """ present = set() notPresent = set() chunkSize = 100 printProgress = (len(lfns) > chunkSize) startTime = time.time() self.__write( "Checking replicas for %d files%s" % (len(lfns), (' (chunks of %d)' % chunkSize) if printProgress else '... ')) for chunk in breakListIntoChunks(lfns, chunkSize): if printProgress: self.__write('.') for _ in xrange(1, 10): res = self.fc.getReplicas(chunk) if res['OK']: present.update(res['Value']['Successful']) self.cachedReplicas.update(res['Value']['Successful']) notPresent.update(res['Value']['Failed']) break else: time.sleep(0.1) self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) if notPresent: self.__logVerbose("Files without replicas:", '\n'.join([''] + sorted(notPresent))) return list(present), list(notPresent) ################################################################################ def getReplicasPresenceFromDirectoryScan(self, lfns): """ Get replicas scanning the directories. Might be faster. """ dirs = {} present = [] notPresent = [] compare = True for lfn in lfns: dirN = os.path.dirname(lfn) if lfn == dirN + '/': compare = False dirs.setdefault(dirN, []).append(lfn) if compare: self.__write( "Checking File Catalog for %d files from %d directories " % (len(lfns), len(dirs))) else: self.__write("Getting files from %d directories " % len(dirs)) startTime = time.time() for dirN in sorted(dirs): startTime1 = time.time() self.__write('.') lfnsFound = self._getFilesFromDirectoryScan(dirN) gLogger.verbose("Obtained %d files in %.1f seconds" % (len(lfnsFound), time.time() - startTime1)) if compare: pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound) notPresent += notPr present += pr else: present += lfnsFound self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) gLogger.info("Found %d files with replicas and %d without" % (len(present), len(notPresent))) return present, notPresent ################################################################################ def __compareLFNLists(self, lfns, lfnsFound): """ return files in both lists and files in lfns and not in lfnsFound """ present = [] notPresent = lfns startTime = time.time() self.__logVerbose("Comparing list of %d LFNs with second list of %d" % (len(lfns), len(lfnsFound))) if lfnsFound: setLfns = set(lfns) setLfnsFound = set(lfnsFound) present = list(setLfns & setLfnsFound) notPresent = list(setLfns - setLfnsFound) self.__logVerbose("End of comparison: %.1f seconds" % (time.time() - startTime)) return present, notPresent def _getFilesFromDirectoryScan(self, dirs): """ calls dm.getFilesFromDirectory """ level = gLogger.getLevel() gLogger.setLevel('FATAL') res = self.dm.getFilesFromDirectory(dirs) gLogger.setLevel(level) if not res['OK']: if 'No such file or directory' not in res['Message']: gLogger.error( "Error getting files from directories %s:" % dirs, res['Message']) return [] if res['Value']: lfnsFound = res['Value'] else: lfnsFound = [] return lfnsFound ################################################################################ def _getTSFiles(self): """ Helper function - get files from the TS """ selectDict = {'TransformationID': self.prod} if self._lfns: selectDict['LFN'] = self._lfns elif self.runStatus and self.fromProd: res = self.transClient.getTransformationRuns({ 'TransformationID': self.fromProd, 'Status': self.runStatus }) if not res['OK']: gLogger.error("Failed to get runs for transformation %d" % self.prod) else: if res['Value']: self.runsList.extend([ run['RunNumber'] for run in res['Value'] if run['RunNumber'] not in self.runsList ]) gLogger.notice("%d runs selected" % len(res['Value'])) elif not self.runsList: gLogger.notice("No runs selected, check completed") DIRAC.exit(0) if not self._lfns and self.runsList: selectDict['RunNumber'] = self.runsList res = self.transClient.getTransformation(self.prod) if not res['OK']: gLogger.error("Failed to find transformation %s" % self.prod) return [], [], [] status = res['Value']['Status'] if status not in ('Active', 'Stopped', 'Completed', 'Idle'): gLogger.notice( "Transformation %s in status %s, will not check if files are processed" % (self.prod, status)) processedLFNs = [] nonProcessedLFNs = [] nonProcessedStatuses = [] if self._lfns: processedLFNs = self._lfns else: res = self.transClient.getTransformationFiles(selectDict) if not res['OK']: gLogger.error( "Failed to get files for transformation %d" % self.prod, res['Message']) return [], [], [] else: processedLFNs = [ item['LFN'] for item in res['Value'] if item['Status'] == 'Processed' ] nonProcessedLFNs = [ item['LFN'] for item in res['Value'] if item['Status'] != 'Processed' ] nonProcessedStatuses = list( set(item['Status'] for item in res['Value'] if item['Status'] != 'Processed')) return processedLFNs, nonProcessedLFNs, nonProcessedStatuses def __getDirectories(self): """ get the directories where to look into (they are either given, or taken from the transformation ID """ if self.directories: directories = [] printout = False for directory in self.directories: if not directory.endswith('...'): directories.append(directory) else: printout = True topDir = os.path.dirname(directory) res = self.fc.listDirectory(topDir) if not res['OK']: return S_ERROR( errno.ENOENT, res['Message'] ) #DError(errno.ENOENT, res['Message'] ) else: matchDir = directory.split('...')[0] directories += [ d for d in res['Value']['Successful'].get( topDir, {}).get('SubDirs', []) if d.startswith(matchDir) ] if printout: gLogger.always('Expanded list of %d directories:\n%s' % (len(directories), '\n'.join(directories))) return directories else: return S_ERROR( errno.ENOENT, 'Need to specify the directories' ) #DError(errno.ENOENT, 'Need to specify the directories') ################################################################################ def __write(self, text): if self.interactive: sys.stdout.write(text) sys.stdout.flush() print text ################################################################################ def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None): """ Select only those files from the values of lfnDict that have a certain type """ if not lfnDict: return {} if not fileTypes: fileTypes = self.fileType if not fileTypesExcluded: fileTypesExcluded = self.fileTypesExcluded else: fileTypesExcluded += [ ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded ] # lfnDict is a dictionary of dictionaries including the metadata, create a deep copy to get modified ancDict = dict(lfnDict) if fileTypes == ['']: fileTypes = [] # and loop on the original dictionaries for ancestor in lfnDict: for desc in lfnDict[ancestor].keys(): ft = lfnDict[ancestor][desc]['FileType'] if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes): ancDict[ancestor].pop(desc) if not len(ancDict[ancestor]): ancDict.pop(ancestor) return ancDict @staticmethod def _getFileTypesCount(lfnDict): """ return file types count """ ft_dict = {} for ancestor in lfnDict: t_dict = {} for desc in lfnDict[ancestor]: ft = lfnDict[ancestor][desc]['FileType'] t_dict[ft] = t_dict.setdefault(ft, 0) + 1 ft_dict[ancestor] = t_dict return ft_dict def __getLFNsFromFC(self): if not self.lfns: directories = [] for dirName in self.__getDirectories(): if not dirName.endswith('/'): dirName += '/' directories.append(dirName) present, notPresent = self.getReplicasPresenceFromDirectoryScan( directories) else: present, notPresent = self.getReplicasPresence(self.lfns) return present, notPresent def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = { 'AllReplicasCorrupted': {}, 'SomeReplicasCorrupted': {}, 'MissingReplica': {}, 'MissingAllReplicas': {}, 'NoReplicas': {} } chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write('.') replicasRes = self.fc.getReplicas(lfnChunk) if not replicasRes['OK']: gLogger.error("error: %s" % replicasRes['Message']) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes['Message']) replicasRes = replicasRes['Value'] if replicasRes['Failed']: retDict['NoReplicas'].update(replicasRes['Failed']) replicas.update(replicasRes['Successful']) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas.keys(), chunkSize): self.__write('.') res = self.fc.getFileMetadata(lfnChunk) if not res['OK']: return S_ERROR(errno.ENOENT, "error %s" % res['Message']) metadata.update(res['Value']['Successful']) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get( lfn, {}).get('Checksum') for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice('Getting checksum of %d replicas in %d SEs' % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel('FATAL') for num, se in enumerate(sorted(seFiles)): self.__write('\n%d. At %s (%d files): ' % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write('.') metadata = oSe.getFileMetadata(surlChunk) if not metadata['OK']: gLogger.error( "Error: getFileMetadata returns %s. Ignore those replicas" % (metadata['Message'])) # Remove from list of replicas as we don't know whether it is OK or not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata['Value'] notFound += len(metadata['Failed']) for lfn in metadata['Failed']: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata['Successful']: checkSum.setdefault( lfn, {})[se] = metadata['Successful'][lfn]['Checksum'] if notFound: gLogger.error('%d files not found' % notFound) gLogger.setLevel(logLevel) gLogger.notice('Verifying checksum of %d files' % len(replicas)) for lfn in replicas: # get the lfn checksum from the LFC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True lfcChecksum = csDict[lfn].pop('LFCChecksum') for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, '') if not surlChecksum or not compareAdler( lfcChecksum, surlChecksum): # if lfcChecksum does not match surlChecksum csDict[lfn][se] = {'PFNChecksum': surlChecksum} gLogger.info( "ERROR!! checksum mismatch at %s for LFN %s: LFC checksum: %s , PFN checksum : %s " % (se, lfn, lfcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict['MissingAllReplicas'][lfn] = 'All' else: gLogger.info("=> All replicas have bad checksum", lfn) retDict['AllReplicasCorrupted'][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict['MissingReplica'][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn] return S_OK(retDict) ################################################################################ # properties def set_prod(self, value): """ Setter """ if value: value = int(value) res = self.transClient.getTransformation(value, extraParams=False) if not res['OK']: S_ERROR( errno.ENOENT, "Couldn't find transformation %d: %s" % (value, res['Message'])) else: self.transType = res['Value']['Type'] if self.interactive: gLogger.info("Production %d has type %s" % (value, self.transType)) else: value = 0 self._prod = value def get_prod(self): """ Getter """ return self._prod prod = property(get_prod, set_prod) def set_fileType(self, value): """ Setter """ self._fileType = [ft.upper() for ft in value] def get_fileType(self): """ Getter """ return self._fileType fileType = property(get_fileType, set_fileType) def set_fileTypesExcluded(self, value): """ Setter """ self._fileTypesExcluded = [ft.upper() for ft in value] def get_fileTypesExcluded(self): """ Getter """ return self._fileTypesExcluded fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded) def set_lfns(self, value): """ Setter """ if isinstance(value, basestring): value = [value] value = [v.replace(' ', '').replace('//', '/') for v in value] self._lfns = value def get_lfns(self): """ Getter """ return self._lfns lfns = property(get_lfns, set_lfns) ############################################################################################### # # This part was backported from DataIntegrityClient # # # This section contains the specific methods for File Catalog->SE checks # def catalogDirectoryToSE(self, lfnDir): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self._getCatalogDirectoryContents(lfnDir) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def catalogFileToSE(self, lfns): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfns, basestring): lfns = [lfns] res = self._getCatalogMetadata(lfns) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] res = self._getCatalogReplicas(catalogMetadata.keys()) if not res['OK']: return res replicas, _zeroReplicaFiles = res['Value'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None): """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ #FIXME: we better use the compareChecksum function instead of this one! or maybe directly checkFC2SE gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) seLfns = {} for lfn, replicaDict in replicas.iteritems(): for se, _url in replicaDict.iteritems(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20))) for se in sorted(seLfns): files = len(seLfns[se]) gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res['OK']: gLogger.error('Failed to get physical file metadata.', res['Message']) return res for lfn, metadata in res['Value'].iteritems(): if lfn in catalogMetadata: if metadata['Size'] != catalogMetadata[lfn][ 'Size']: # and ( metadata['Size'] != 0 ): sizeMismatch.append((lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas(sizeMismatch, se, 'CatalogPFNSizeMismatch') return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """ Check obtain the physical file metadata and check the files are available """ gLogger.info('Checking the integrity of %s physical files at %s' % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get metadata for lfns.', res['Message']) return res lfnMetadataDict = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res['Value']['Failed'].iteritems(): if re.search('File does not exist', reason): missingReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNMissing')) if missingReplicas: self.dic.reportProblematicReplicas(missingReplicas, se, 'PFNMissing') lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, lfnMetadata in lfnMetadataDict.iteritems(): if lfnMetadata['Lost']: lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost')) if lfnMetadata['Unavailable']: unavailableReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNUnavailable')) if not lfnMetadata['Size']: zeroSizeReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNZeroSize')) if lostReplicas: self.dic.reportProblematicReplicas(lostReplicas, se, 'PFNLost') if unavailableReplicas: self.dic.reportProblematicReplicas(unavailableReplicas, se, 'PFNUnavailable') if zeroSizeReplicas: self.dic.reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize') gLogger.info( 'Checking the integrity of physical files at %s complete' % se) return S_OK(lfnMetadataDict) ########################################################################## # # This section contains the specific methods for SE->File Catalog checks # def storageDirectoryToCatalog(self, lfnDir, storageElement): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info("-" * 40) gLogger.info("Performing the SE->FC check at %s" % storageElement) gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self.getStorageDirectoryContents(lfnDir, storageElement) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement) return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}}) def __checkCatalogForSEFiles(self, storageMetadata, storageElement): gLogger.info('Checking %s storage files exist in the catalog' % len(storageMetadata)) res = self.fc.getReplicas(storageMetadata) if not res['OK']: gLogger.error("Failed to get replicas for LFN", res['Message']) return res failedLfns = res['Value']['Failed'] successfulLfns = res['Value']['Successful'] notRegisteredLfns = [] for lfn in storageMetadata: if lfn in failedLfns: if 'No such file or directory' in failedLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) failedLfns.pop(lfn) elif storageElement not in successfulLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) if notRegisteredLfns: self.dic.reportProblematicReplicas(notRegisteredLfns, storageElement, 'LFNNotRegistered') if failedLfns: return S_ERROR(errno.ENOENT, 'Failed to obtain replicas') # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata res = self._getCatalogMetadata(storageMetadata) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.iteritems(): lfnStorageMetadata = storageMetadata[lfn] if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and ( lfnStorageMetadata['Size'] != 0): sizeMismatch.append((lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas(sizeMismatch, storageElement, 'CatalogPFNSizeMismatch') gLogger.info('Checking storage files exist in the catalog complete') resDict = { 'CatalogMetadata': catalogMetadata, 'StorageMetadata': storageMetadata } return S_OK(resDict) def getStorageDirectoryContents(self, lfnDir, storageElement): """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ gLogger.info('Obtaining the contents for %s directories at %s' % (len(lfnDir), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnDir) if not res['OK']: gLogger.error("Failed to obtain existance of directories", res['Message']) return res for directory, error in res['Value']['Failed'].iteritems(): gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, 'Failed to determine existance of directory') directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sorted(directoryExists): exists = directoryExists[directory] if exists: activeDirs.append(directory) allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = se.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif currentDir in res['Value']['Failed']: gLogger.error( 'Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) return S_ERROR(errno.ENOENT, res['Value']['Failed'][currentDir]) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( se.getLFNFromURL(dirContents['SubDirs']).get( 'Value', {}).get('Successful', [])) fileURLMetadata = dirContents['Files'] fileMetadata = {} res = se.getLFNFromURL(fileURLMetadata) if not res['OK']: gLogger.error('Failed to get directory content LFNs', res['Message']) return res for url, error in res['Value']['Failed'].iteritems(): gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, "Failed to get LFNs for PFNs") urlLfns = res['Value']['Successful'] for urlLfn, lfn in urlLfns.iteritems(): fileMetadata[lfn] = fileURLMetadata[urlLfn] allFiles.update(fileMetadata) zeroSizeFiles = [] for lfn in sorted(allFiles): if os.path.basename(lfn) == 'dirac_directory': allFiles.pop(lfn) else: metadata = allFiles[lfn] if not metadata['Size']: zeroSizeFiles.append( (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize')) if zeroSizeFiles: self.dic.reportProblematicReplicas(zeroSizeFiles, storageElement, 'PFNZeroSize') gLogger.info('Obtained at total of %s files for directories at %s' % (len(allFiles), storageElement)) return S_OK(allFiles) def _getCatalogDirectoryContents(self, lfnDirs): """ Obtain the contents of the supplied directory, recursively """ def _getDirectoryContent(directory): """ Inner function: recursively scan a directory, returns list of LFNs """ filesInDirectory = {} gLogger.debug("Examining %s" % directory) res = self.fc.listDirectory(directory) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res if directory in res['Value']['Failed']: gLogger.error( 'Failed to get directory content', '%s %s' % (directory, res['Value']['Failed'][directory])) return S_ERROR('Failed to get directory content') if directory not in res['Value']['Successful']: return S_ERROR('Directory not existing?') # first, adding the files found in the current directory gLogger.debug( "Files in %s: %d" % (directory, len( res['Value']['Successful'][directory]['Files']))) filesInDirectory.update( res['Value']['Successful'][directory]['Files']) #then, looking for subDirectories content if res['Value']['Successful'][directory]['SubDirs']: for l_dir in res['Value']['Successful'][directory]['SubDirs']: #recursion here subDirContent = _getDirectoryContent(l_dir) if not subDirContent['OK']: return subDirContent else: filesInDirectory.update(subDirContent['Value']) return S_OK(filesInDirectory) gLogger.info('Obtaining the catalog contents for %d directories' % len(lfnDirs)) allFiles = {} for lfnDir in lfnDirs: dirContent = _getDirectoryContent(lfnDir) if not dirContent['OK']: return dirContent else: gLogger.debug("Content of directory %s: %d files" % (lfnDir, len(dirContent['Value']))) allFiles.update(dirContent['Value']) gLogger.debug("Content of directories examined: %d files" % len(allFiles)) replicas = self.fc.getReplicas(list(allFiles)) if not replicas['OK']: return replicas if replicas['Value']['Failed']: return S_ERROR("Failures in replicas discovery") return S_OK({ 'Metadata': allFiles, 'Replicas': replicas['Value']['Successful'] }) def _getCatalogReplicas(self, lfns): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info('Obtaining the replicas for %s files' % len(lfns)) zeroReplicaFiles = [] res = self.fc.getReplicas(lfns, allStatus=True) if not res['OK']: gLogger.error('Failed to get catalog replicas', res['Message']) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('File has zero replicas', error): zeroReplicaFiles.append(lfn) gLogger.info('Obtaining the replicas for files complete') return S_OK((allReplicas, zeroReplicaFiles)) def _getCatalogMetadata(self, lfns): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK({}) gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns)) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fc.getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get catalog metadata', res['Message']) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('No such file or directory', error): missingCatalogFiles.append(lfn) gLogger.info('Obtaining the catalog metadata complete') return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
else: ids = args[0].split(",") idList = [] for id in ids: r = id.split(':') if len(r) > 1: for i in xrange(int(r[0]), int(r[1]) + 1): idList.append(i) else: idList.append(int(r[0])) from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient transClient = TransformationClient() for transID in idList: res = transClient.getTransformationRuns({'TransformationID': transID}) if not res['OK']: gLogger.fatal("Error getting runs for transformation %s" % transID, res['Message']) DIRAC.exit(1) runs = res['Value'] runs.sort(cmp=(lambda r1, r2: int(r1['RunNumber'] - r2['RunNumber']))) if not runList: if active: # Flush all Active runs in that transformation toBeFlushed = [ run['RunNumber'] for run in runs if run['Status'] != 'Flush' ] else: