def main(): Script.parseCommandLine() from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient args = Script.getPositionalArgs() if len(args) != 1: Script.showHelp(exitCode=1) tc = TransformationClient() res = tc.getTransformationFiles({'TransformationID': args[0]}) if not res['OK']: DIRAC.gLogger.error(res['Message']) DIRAC.exit(2) for transfile in res['Value']: DIRAC.gLogger.notice(transfile['LFN'])
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument("transID: transformation ID") _, args = Script.parseCommandLine() from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient if len(args) != 1: Script.showHelp(exitCode=1) tc = TransformationClient() res = tc.getTransformationFiles({"TransformationID": args[0]}) if not res["OK"]: DIRAC.gLogger.error(res["Message"]) DIRAC.exit(2) for transfile in res["Value"]: DIRAC.gLogger.notice(transfile["LFN"])
class ConsistencyInspector(object): """A class for handling some consistency checks""" def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None): """c'tor interactive: Data Manager (True) or DIRAC Agente (False) transClient: TransformationClient() if None, else transClient params dm: DataManager() if None, else dm params fc: FileCatalog() if None, else fc params One object for every production/directoriesList... """ self.interactive = interactive self.transClient = TransformationClient( ) if transClient is None else transClient self.dataManager = dm if dm else DataManager() self.fileCatalog = fc if fc else FileCatalog() self.dic = dic if dic else DataIntegrityClient() self.dirac = Dirac() # Base elements from which to start the consistency checks self._prod = 0 self._bkQuery = None self._fileType = [] self._fileTypesExcluded = [] self._lfns = [] self.directories = [] # Accessory elements self.runsList = [] self.runStatus = None self.fromProd = None self.transType = "" self.cachedReplicas = {} self.prcdWithDesc = [] self.prcdWithoutDesc = [] self.prcdWithMultDesc = [] self.nonPrcdWithDesc = [] self.nonPrcdWithoutDesc = [] self.nonPrcdWithMultDesc = [] self.descForPrcdLFNs = [] self.descForNonPrcdLFNs = [] self.removedFiles = [] self.absentLFNsInFC = [] self.existLFNsNoSE = {} self.existLFNsBadReplicas = {} self.existLFNsBadFiles = {} self.existLFNsNotExisting = {} self.commonAncestors = {} self.multipleDescendants = {} self.ancestors = {} self._verbose = False def __logVerbose(self, msg, msg1=""): """logger helper for verbose information""" if self._verbose: newMsg = "[ConsistencyChecks] " + ( "[%s] " % str(self.prod)) if self.prod else "" # Add that prefix to all lines of the message newMsg1 = msg1.replace("\n", "\n" + newMsg) newMsg += msg.replace("\n", "\n" + newMsg) gLogger.notice(newMsg, newMsg1) else: gLogger.verbose(msg, msg1) ########################################################################## def checkFC2SE(self): """check files vs SE information""" repDict = self.compareChecksum(self.lfns) self.existLFNsNoSE = repDict["MissingReplica"] self.existLFNsNotExisting = repDict["MissingAllReplicas"] self.existLFNsBadReplicas = repDict["SomeReplicasCorrupted"] self.existLFNsBadFiles = repDict["AllReplicasCorrupted"] def getReplicasPresence(self, lfns): """get the replicas using the standard FileCatalog.getReplicas()""" present = set() notPresent = set() chunkSize = 100 printProgress = len(lfns) > chunkSize startTime = time.time() self.__write( "Checking replicas for %d files%s" % (len(lfns), (" (chunks of %d)" % chunkSize) if printProgress else "... ")) for chunk in breakListIntoChunks(lfns, chunkSize): if printProgress: self.__write(".") for _ in range(1, 10): res = self.fileCatalog.getReplicas(chunk) if res["OK"]: present.update(res["Value"]["Successful"]) self.cachedReplicas.update(res["Value"]["Successful"]) notPresent.update(res["Value"]["Failed"]) break else: time.sleep(0.1) self.__write(" (%.1f seconds)\n" % (time.time() - startTime)) if notPresent: self.__logVerbose("Files without replicas:", "\n".join([""] + sorted(notPresent))) return list(present), list(notPresent) ########################################################################## def getReplicasPresenceFromDirectoryScan(self, lfns): """Get replicas scanning the directories. Might be faster.""" dirs = {} present = [] notPresent = [] compare = True for lfn in lfns: dirN = os.path.dirname(lfn) if lfn == dirN + "/": compare = False dirs.setdefault(dirN, []).append(lfn) if compare: self.__write( "Checking File Catalog for %d files from %d directories " % (len(lfns), len(dirs))) else: self.__write("Getting files from %d directories " % len(dirs)) startTime = time.time() for dirN in sorted(dirs): startTime1 = time.time() self.__write(".") lfnsFound = self._getFilesFromDirectoryScan(dirN) gLogger.verbose("Obtained %d files in %.1f seconds" % (len(lfnsFound), time.time() - startTime1)) if compare: pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound) notPresent += notPr present += pr else: present += lfnsFound self.__write(" (%.1f seconds)\n" % (time.time() - startTime)) gLogger.info("Found %d files with replicas and %d without" % (len(present), len(notPresent))) return present, notPresent ########################################################################## def __compareLFNLists(self, lfns, lfnsFound): """return files in both lists and files in lfns and not in lfnsFound""" present = [] notPresent = lfns startTime = time.time() self.__logVerbose("Comparing list of %d LFNs with second list of %d" % (len(lfns), len(lfnsFound))) if lfnsFound: setLfns = set(lfns) setLfnsFound = set(lfnsFound) present = list(setLfns & setLfnsFound) notPresent = list(setLfns - setLfnsFound) self.__logVerbose("End of comparison: %.1f seconds" % (time.time() - startTime)) return present, notPresent def _getFilesFromDirectoryScan(self, dirs): """calls dm.getFilesFromDirectory""" level = gLogger.getLevel() gLogger.setLevel("FATAL") res = self.dataManager.getFilesFromDirectory(dirs) gLogger.setLevel(level) if not res["OK"]: if "No such file or directory" not in res["Message"]: gLogger.error( "Error getting files from directories %s:" % dirs, res["Message"]) return [] if res["Value"]: lfnsFound = res["Value"] else: lfnsFound = [] return lfnsFound ########################################################################## def _getTSFiles(self): """Helper function - get files from the TS""" selectDict = {"TransformationID": self.prod} if self._lfns: selectDict["LFN"] = self._lfns elif self.runStatus and self.fromProd: res = self.transClient.getTransformationRuns({ "TransformationID": self.fromProd, "Status": self.runStatus }) if not res["OK"]: gLogger.error("Failed to get runs for transformation %d" % self.prod) else: if res["Value"]: self.runsList.extend([ run["RunNumber"] for run in res["Value"] if run["RunNumber"] not in self.runsList ]) gLogger.notice("%d runs selected" % len(res["Value"])) elif not self.runsList: gLogger.notice("No runs selected, check completed") DIRAC.exit(0) if not self._lfns and self.runsList: selectDict["RunNumber"] = self.runsList res = self.transClient.getTransformation(self.prod) if not res["OK"]: gLogger.error("Failed to find transformation %s" % self.prod) return [], [], [] status = res["Value"]["Status"] if status not in ("Active", "Stopped", "Completed", "Idle"): gLogger.notice( "Transformation %s in status %s, will not check if files are processed" % (self.prod, status)) processedLFNs = [] nonProcessedLFNs = [] nonProcessedStatuses = [] if self._lfns: processedLFNs = self._lfns else: res = self.transClient.getTransformationFiles(selectDict) if not res["OK"]: gLogger.error( "Failed to get files for transformation %d" % self.prod, res["Message"]) return [], [], [] else: processedLFNs = [ item["LFN"] for item in res["Value"] if item["Status"] == "Processed" ] nonProcessedLFNs = [ item["LFN"] for item in res["Value"] if item["Status"] != "Processed" ] nonProcessedStatuses = list( set(item["Status"] for item in res["Value"] if item["Status"] != "Processed")) return processedLFNs, nonProcessedLFNs, nonProcessedStatuses def __getDirectories(self): """get the directories where to look into (they are either given, or taken from the transformation ID""" if self.directories: directories = [] printout = False for directory in self.directories: if not directory.endswith("..."): directories.append(directory) else: printout = True topDir = os.path.dirname(directory) res = self.fileCatalog.listDirectory(topDir) if not res["OK"]: # DError(errno.ENOENT, res['Message'] ) return S_ERROR(errno.ENOENT, res["Message"]) else: matchDir = directory.split("...")[0] directories += [ d for d in res["Value"]["Successful"].get( topDir, {}).get("SubDirs", []) if d.startswith(matchDir) ] if printout: gLogger.always("Expanded list of %d directories:\n%s" % (len(directories), "\n".join(directories))) return directories else: return S_ERROR(errno.ENOENT, "Need to specify the directories") ########################################################################## def __write(self, text): if self.interactive: sys.stdout.write(text) sys.stdout.flush() ########################################################################## def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None): """Select only those files from the values of lfnDict that have a certain type""" if not lfnDict: return {} if not fileTypes: fileTypes = self.fileType if not fileTypesExcluded: fileTypesExcluded = self.fileTypesExcluded else: fileTypesExcluded += [ ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded ] # lfnDict is a dictionary of dictionaries including the metadata, create a # deep copy to get modified ancDict = dict(lfnDict) if fileTypes == [""]: fileTypes = [] # and loop on the original dictionaries for ancestor in lfnDict: for desc in list(lfnDict[ancestor]): ft = lfnDict[ancestor][desc]["FileType"] if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes): ancDict[ancestor].pop(desc) if not len(ancDict[ancestor]): ancDict.pop(ancestor) return ancDict @staticmethod def _getFileTypesCount(lfnDict): """return file types count""" ft_dict = {} for ancestor in lfnDict: t_dict = {} for desc in lfnDict[ancestor]: ft = lfnDict[ancestor][desc]["FileType"] t_dict[ft] = t_dict.setdefault(ft, 0) + 1 ft_dict[ancestor] = t_dict return ft_dict def __getLFNsFromFC(self): """Check if a list of LFNs is in the FC or not""" if not self.lfns: directories = [] for dirName in self.__getDirectories(): if not dirName.endswith("/"): dirName += "/" directories.append(dirName) present, notPresent = self.getReplicasPresenceFromDirectoryScan( directories) else: present, notPresent = self.getReplicasPresence(self.lfns) return present, notPresent def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = { "AllReplicasCorrupted": {}, "SomeReplicasCorrupted": {}, "MissingReplica": {}, "MissingAllReplicas": {}, "NoReplicas": {}, } chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write(".") replicasRes = self.fileCatalog.getReplicas(lfnChunk) if not replicasRes["OK"]: gLogger.error("error: %s" % replicasRes["Message"]) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes["Message"]) replicasRes = replicasRes["Value"] if replicasRes["Failed"]: retDict["NoReplicas"].update(replicasRes["Failed"]) replicas.update(replicasRes["Successful"]) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas, chunkSize): self.__write(".") res = self.fileCatalog.getFileMetadata(lfnChunk) if not res["OK"]: return S_ERROR(errno.ENOENT, "error %s" % res["Message"]) metadata.update(res["Value"]["Successful"]) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})["FCChecksum"] = metadata.get( lfn, {}).get("Checksum") for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice("Getting checksum of %d replicas in %d SEs" % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel("FATAL") for num, se in enumerate(sorted(seFiles)): self.__write("\n%d. At %s (%d files): " % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write(".") metadata = oSe.getFileMetadata(surlChunk) if not metadata["OK"]: gLogger.error( "Error: getFileMetadata returns %s. Ignore those replicas" % (metadata["Message"])) # Remove from list of replicas as we don't know whether it is OK or # not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata["Value"] notFound += len(metadata["Failed"]) for lfn in metadata["Failed"]: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata["Successful"]: checkSum.setdefault( lfn, {})[se] = metadata["Successful"][lfn]["Checksum"] if notFound: gLogger.error("%d files not found" % notFound) gLogger.setLevel(logLevel) gLogger.notice("Verifying checksum of %d files" % len(replicas)) for lfn in replicas: # get the lfn checksum from the FC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True fcChecksum = csDict[lfn].pop("FCChecksum") for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, "") if not surlChecksum or not compareAdler( fcChecksum, surlChecksum): # if fcChecksum does not match surlChecksum csDict[lfn][se] = {"PFNChecksum": surlChecksum} gLogger.info( "ERROR!! checksum mismatch at %s for LFN %s: FC checksum: %s , PFN checksum : %s " % (se, lfn, fcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict["MissingAllReplicas"][lfn] = "All" else: gLogger.info("=> All replicas have bad checksum", lfn) retDict["AllReplicasCorrupted"][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict["MissingReplica"][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict["SomeReplicasCorrupted"][lfn] = csDict[lfn] return S_OK(retDict) ########################################################################## # properties def set_prod(self, value): """Setter""" if value: value = int(value) res = self.transClient.getTransformation(value, extraParams=False) if not res["OK"]: raise Exception("Couldn't find transformation %d: %s" % (value, res["Message"])) else: self.transType = res["Value"]["Type"] if self.interactive: gLogger.info("Production %d has type %s" % (value, self.transType)) else: value = 0 self._prod = value def get_prod(self): """Getter""" return self._prod prod = property(get_prod, set_prod) def set_fileType(self, value): """Setter""" self._fileType = [ft.upper() for ft in value] def get_fileType(self): """Getter""" return self._fileType fileType = property(get_fileType, set_fileType) def set_fileTypesExcluded(self, value): """Setter""" self._fileTypesExcluded = [ft.upper() for ft in value] def get_fileTypesExcluded(self): """Getter""" return self._fileTypesExcluded fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded) def set_lfns(self, value): """Setter""" if isinstance(value, six.string_types): value = [value] value = [v.replace(" ", "").replace("//", "/") for v in value] self._lfns = value def get_lfns(self): """Getter""" return self._lfns lfns = property(get_lfns, set_lfns) ########################################################################## # # This part was backported from DataIntegrityClient # # # This section contains the specific methods for File Catalog->SE checks # def catalogDirectoryToSE(self, lfnDir): """This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfnDir, six.string_types): lfnDir = [lfnDir] res = self._getCatalogDirectoryContents(lfnDir) if not res["OK"]: return res replicas = res["Value"]["Replicas"] catalogMetadata = res["Value"]["Metadata"] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res["OK"]: return res resDict = { "CatalogMetadata": catalogMetadata, "CatalogReplicas": replicas } return S_OK(resDict) def catalogFileToSE(self, lfns): """This obtains the replica and metadata information from the catalog and checks against the storage elements.""" gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfns, six.string_types): lfns = [lfns] res = self._getCatalogMetadata(lfns) if not res["OK"]: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res["Value"] res = self._getCatalogReplicas(list(catalogMetadata)) if not res["OK"]: return res replicas, _zeroReplicaFiles = res["Value"] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res["OK"]: return res resDict = { "CatalogMetadata": catalogMetadata, "CatalogReplicas": replicas } return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None): """This method takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ # FIXME: we better use the compareChecksum function instead of this one! # or maybe directly checkFC2SE gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) seLfns = {} for lfn, replicaDict in replicas.items(): for se, _url in replicaDict.items(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info("%s %s" % ("Storage Element".ljust(20), "Replicas".rjust(20))) for se in sorted(seLfns): files = len(seLfns[se]) gLogger.info("%s %s" % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res["OK"]: gLogger.error("Failed to get physical file metadata.", res["Message"]) return res for lfn, metadata in res["Value"].items(): if lfn in catalogMetadata: # and ( metadata['Size'] != 0 ): if metadata["Size"] != catalogMetadata[lfn]["Size"]: sizeMismatch.append((lfn, "deprecatedUrl", se, "CatalogPFNSizeMismatch")) if sizeMismatch: self.dic.reportProblematicReplicas(sizeMismatch, se, "CatalogPFNSizeMismatch") return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """Check obtain the physical file metadata and check the files are available""" gLogger.info("Checking the integrity of %s physical files at %s" % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res["OK"]: gLogger.error("Failed to get metadata for lfns.", res["Message"]) return res pfnMetadata = res["Value"]["Successful"] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res["Value"]["Failed"].items(): if re.search("File does not exist", reason): missingReplicas.append( (lfn, "deprecatedUrl", se, "PFNMissing")) if missingReplicas: self.dic.reportProblematicReplicas(missingReplicas, se, "PFNMissing") lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, metadata in pfnMetadata.items(): if metadata.get("Lost"): lostReplicas.append((lfn, "deprecatedUrl", se, "PFNLost")) if metadata.get("Unavailable") or not metadata["Accessible"]: unavailableReplicas.append( (lfn, "deprecatedUrl", se, "PFNUnavailable")) if not metadata["Size"]: zeroSizeReplicas.append( (lfn, "deprecatedUrl", se, "PFNZeroSize")) if lostReplicas: self.dic.reportProblematicReplicas(lostReplicas, se, "PFNLost") if unavailableReplicas: self.dic.reportProblematicReplicas(unavailableReplicas, se, "PFNUnavailable") if zeroSizeReplicas: self.dic.reportProblematicReplicas(zeroSizeReplicas, se, "PFNZeroSize") gLogger.info( "Checking the integrity of physical files at %s complete" % se) return S_OK(pfnMetadata) ########################################################################## # # This section contains the specific methods for SE->File Catalog checks # def _getCatalogDirectoryContents(self, lfnDirs): """Obtain the contents of the supplied directory, recursively""" def _getDirectoryContent(directory): """Inner function: recursively scan a directory, returns list of LFNs""" filesInDirectory = {} gLogger.debug("Examining %s" % directory) res = self.fileCatalog.listDirectory(directory) if not res["OK"]: gLogger.error("Failed to get directory contents", res["Message"]) return res if directory in res["Value"]["Failed"]: gLogger.error( "Failed to get directory content", "%s %s" % (directory, res["Value"]["Failed"][directory])) return S_ERROR("Failed to get directory content") if directory not in res["Value"]["Successful"]: return S_ERROR("Directory not existing?") # first, adding the files found in the current directory gLogger.debug( "Files in %s: %d" % (directory, len( res["Value"]["Successful"][directory]["Files"]))) filesInDirectory.update( res["Value"]["Successful"][directory]["Files"]) # then, looking for subDirectories content if res["Value"]["Successful"][directory]["SubDirs"]: for l_dir in res["Value"]["Successful"][directory]["SubDirs"]: # recursion here subDirContent = _getDirectoryContent(l_dir) if not subDirContent["OK"]: return subDirContent else: filesInDirectory.update(subDirContent["Value"]) return S_OK(filesInDirectory) gLogger.info("Obtaining the catalog contents for %d directories" % len(lfnDirs)) allFiles = {} for lfnDir in lfnDirs: dirContent = _getDirectoryContent(lfnDir) if not dirContent["OK"]: return dirContent else: gLogger.debug("Content of directory %s: %d files" % (lfnDir, len(dirContent["Value"]))) allFiles.update(dirContent["Value"]) gLogger.debug("Content of directories examined: %d files" % len(allFiles)) replicas = self.fileCatalog.getReplicas(list(allFiles)) if not replicas["OK"]: return replicas if replicas["Value"]["Failed"]: return S_ERROR("Failures in replicas discovery") return S_OK({ "Metadata": allFiles, "Replicas": replicas["Value"]["Successful"] }) def _getCatalogReplicas(self, lfns): """Obtain the file replicas from the catalog while checking that there are replicas""" if not lfns: return S_OK(([], [])) gLogger.info("Obtaining the replicas for %s files" % len(lfns)) zeroReplicaFiles = [] res = self.fileCatalog.getReplicas(lfns, allStatus=True) if not res["OK"]: gLogger.error("Failed to get catalog replicas", res["Message"]) return res allReplicas = res["Value"]["Successful"] for lfn, error in res["Value"]["Failed"].items(): if re.search("File has zero replicas", error): zeroReplicaFiles.append(lfn) gLogger.info("Obtaining the replicas for files complete") return S_OK((allReplicas, zeroReplicaFiles)) def _getCatalogMetadata(self, lfns): """Obtain the file metadata from the catalog while checking they exist""" allMetadata = [] missingCatalogFiles = [] zeroSizeFiles = [] if not lfns: return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles)) gLogger.info("Obtaining the catalog metadata for %s files" % len(lfns)) res = self.fileCatalog.getFileMetadata(lfns) if not res["OK"]: gLogger.error("Failed to get catalog metadata", res["Message"]) return res allMetadata = res["Value"]["Successful"] for lfn, error in res["Value"]["Failed"].items(): if re.search("No such file or directory", error): missingCatalogFiles.append(lfn) gLogger.info("Obtaining the catalog metadata complete") return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
class ConsistencyInspector(object): """ A class for handling some consistency checks """ def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None): """ c'tor interactive: Data Manager (True) or DIRAC Agente (False) transClient: TransformationClient() if None, else transClient params dm: DataManager() if None, else dm params fc: FileCatalog() if None, else fc params One object for every production/directoriesList... """ self.interactive = interactive self.transClient = TransformationClient() if transClient is None else transClient self.dataManager = dm if dm else DataManager() self.fileCatalog = fc if fc else FileCatalog() self.dic = dic if dic else DataIntegrityClient() self.dirac = Dirac() # Base elements from which to start the consistency checks self._prod = 0 self._bkQuery = None self._fileType = [] self._fileTypesExcluded = [] self._lfns = [] self.noLFC = False self.directories = [] # Accessory elements self.runsList = [] self.runStatus = None self.fromProd = None self.transType = '' self.cachedReplicas = {} self.prcdWithDesc = [] self.prcdWithoutDesc = [] self.prcdWithMultDesc = [] self.nonPrcdWithDesc = [] self.nonPrcdWithoutDesc = [] self.nonPrcdWithMultDesc = [] self.descForPrcdLFNs = [] self.descForNonPrcdLFNs = [] self.removedFiles = [] self.absentLFNsInFC = [] self.existLFNsNoSE = {} self.existLFNsBadReplicas = {} self.existLFNsBadFiles = {} self.existLFNsNotExisting = {} self.commonAncestors = {} self.multipleDescendants = {} self.ancestors = {} self._verbose = False def __logVerbose(self, msg, msg1=''): """ logger helper for verbose information """ if self._verbose: newMsg = '[ConsistencyChecks] ' + ('[%s] ' % str(self.prod)) if self.prod else '' # Add that prefix to all lines of the message newMsg1 = msg1.replace('\n', '\n' + newMsg) newMsg += msg.replace('\n', '\n' + newMsg) gLogger.notice(newMsg, newMsg1) else: gLogger.verbose(msg, msg1) ########################################################################## def checkFC2SE(self): """ check files vs SE information """ repDict = self.compareChecksum(self.lfns) self.existLFNsNoSE = repDict['MissingReplica'] self.existLFNsNotExisting = repDict['MissingAllReplicas'] self.existLFNsBadReplicas = repDict['SomeReplicasCorrupted'] self.existLFNsBadFiles = repDict['AllReplicasCorrupted'] def getReplicasPresence(self, lfns): """ get the replicas using the standard FileCatalog.getReplicas() """ present = set() notPresent = set() chunkSize = 100 printProgress = (len(lfns) > chunkSize) startTime = time.time() self.__write("Checking replicas for %d files%s" % (len(lfns), (' (chunks of %d)' % chunkSize) if printProgress else '... ')) for chunk in breakListIntoChunks(lfns, chunkSize): if printProgress: self.__write('.') for _ in xrange(1, 10): res = self.fileCatalog.getReplicas(chunk) if res['OK']: present.update(res['Value']['Successful']) self.cachedReplicas.update(res['Value']['Successful']) notPresent.update(res['Value']['Failed']) break else: time.sleep(0.1) self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) if notPresent: self.__logVerbose("Files without replicas:", '\n'.join([''] + sorted(notPresent))) return list(present), list(notPresent) ########################################################################## def getReplicasPresenceFromDirectoryScan(self, lfns): """ Get replicas scanning the directories. Might be faster. """ dirs = {} present = [] notPresent = [] compare = True for lfn in lfns: dirN = os.path.dirname(lfn) if lfn == dirN + '/': compare = False dirs.setdefault(dirN, []).append(lfn) if compare: self.__write("Checking File Catalog for %d files from %d directories " % ( len(lfns), len(dirs))) else: self.__write("Getting files from %d directories " % len(dirs)) startTime = time.time() for dirN in sorted(dirs): startTime1 = time.time() self.__write('.') lfnsFound = self._getFilesFromDirectoryScan(dirN) gLogger.verbose("Obtained %d files in %.1f seconds" % (len(lfnsFound), time.time() - startTime1)) if compare: pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound) notPresent += notPr present += pr else: present += lfnsFound self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) gLogger.info("Found %d files with replicas and %d without" % (len(present), len(notPresent))) return present, notPresent ########################################################################## def __compareLFNLists(self, lfns, lfnsFound): """ return files in both lists and files in lfns and not in lfnsFound """ present = [] notPresent = lfns startTime = time.time() self.__logVerbose("Comparing list of %d LFNs with second list of %d" % ( len(lfns), len(lfnsFound))) if lfnsFound: setLfns = set(lfns) setLfnsFound = set(lfnsFound) present = list(setLfns & setLfnsFound) notPresent = list(setLfns - setLfnsFound) self.__logVerbose("End of comparison: %.1f seconds" % (time.time() - startTime)) return present, notPresent def _getFilesFromDirectoryScan(self, dirs): """ calls dm.getFilesFromDirectory """ level = gLogger.getLevel() gLogger.setLevel('FATAL') res = self.dataManager.getFilesFromDirectory(dirs) gLogger.setLevel(level) if not res['OK']: if 'No such file or directory' not in res['Message']: gLogger.error("Error getting files from directories %s:" % dirs, res['Message']) return [] if res['Value']: lfnsFound = res['Value'] else: lfnsFound = [] return lfnsFound ########################################################################## def _getTSFiles(self): """ Helper function - get files from the TS """ selectDict = {'TransformationID': self.prod} if self._lfns: selectDict['LFN'] = self._lfns elif self.runStatus and self.fromProd: res = self.transClient.getTransformationRuns( {'TransformationID': self.fromProd, 'Status': self.runStatus}) if not res['OK']: gLogger.error("Failed to get runs for transformation %d" % self.prod) else: if res['Value']: self.runsList.extend( [run['RunNumber'] for run in res['Value'] if run['RunNumber'] not in self.runsList]) gLogger.notice("%d runs selected" % len(res['Value'])) elif not self.runsList: gLogger.notice("No runs selected, check completed") DIRAC.exit(0) if not self._lfns and self.runsList: selectDict['RunNumber'] = self.runsList res = self.transClient.getTransformation(self.prod) if not res['OK']: gLogger.error("Failed to find transformation %s" % self.prod) return [], [], [] status = res['Value']['Status'] if status not in ('Active', 'Stopped', 'Completed', 'Idle'): gLogger.notice("Transformation %s in status %s, will not check if files are processed" % ( self.prod, status)) processedLFNs = [] nonProcessedLFNs = [] nonProcessedStatuses = [] if self._lfns: processedLFNs = self._lfns else: res = self.transClient.getTransformationFiles(selectDict) if not res['OK']: gLogger.error("Failed to get files for transformation %d" % self.prod, res['Message']) return [], [], [] else: processedLFNs = [item['LFN'] for item in res['Value'] if item['Status'] == 'Processed'] nonProcessedLFNs = [item['LFN'] for item in res['Value'] if item['Status'] != 'Processed'] nonProcessedStatuses = list( set(item['Status'] for item in res['Value'] if item['Status'] != 'Processed')) return processedLFNs, nonProcessedLFNs, nonProcessedStatuses def __getDirectories(self): """ get the directories where to look into (they are either given, or taken from the transformation ID """ if self.directories: directories = [] printout = False for directory in self.directories: if not directory.endswith('...'): directories.append(directory) else: printout = True topDir = os.path.dirname(directory) res = self.fileCatalog.listDirectory(topDir) if not res['OK']: # DError(errno.ENOENT, res['Message'] ) return S_ERROR(errno.ENOENT, res['Message']) else: matchDir = directory.split('...')[0] directories += [d for d in res['Value']['Successful'].get(topDir, {}).get('SubDirs', []) if d.startswith(matchDir)] if printout: gLogger.always('Expanded list of %d directories:\n%s' % (len(directories), '\n'.join(directories))) return directories else: return S_ERROR(errno.ENOENT, 'Need to specify the directories') ########################################################################## def __write(self, text): if self.interactive: sys.stdout.write(text) sys.stdout.flush() ########################################################################## def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None): """ Select only those files from the values of lfnDict that have a certain type """ if not lfnDict: return {} if not fileTypes: fileTypes = self.fileType if not fileTypesExcluded: fileTypesExcluded = self.fileTypesExcluded else: fileTypesExcluded += [ ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded] # lfnDict is a dictionary of dictionaries including the metadata, create a # deep copy to get modified ancDict = dict(lfnDict) if fileTypes == ['']: fileTypes = [] # and loop on the original dictionaries for ancestor in lfnDict: for desc in lfnDict[ancestor].keys(): ft = lfnDict[ancestor][desc]['FileType'] if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes): ancDict[ancestor].pop(desc) if not len(ancDict[ancestor]): ancDict.pop(ancestor) return ancDict @staticmethod def _getFileTypesCount(lfnDict): """ return file types count """ ft_dict = {} for ancestor in lfnDict: t_dict = {} for desc in lfnDict[ancestor]: ft = lfnDict[ancestor][desc]['FileType'] t_dict[ft] = t_dict.setdefault(ft, 0) + 1 ft_dict[ancestor] = t_dict return ft_dict def __getLFNsFromFC(self): """ Check if a list of LFNs is in the FC or not """ if not self.lfns: directories = [] for dirName in self.__getDirectories(): if not dirName.endswith('/'): dirName += '/' directories.append(dirName) present, notPresent = self.getReplicasPresenceFromDirectoryScan( directories) else: present, notPresent = self.getReplicasPresence(self.lfns) return present, notPresent def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = {'AllReplicasCorrupted': {}, 'SomeReplicasCorrupted': {}, 'MissingReplica': {}, 'MissingAllReplicas': {}, 'NoReplicas': {}} chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write('.') replicasRes = self.fileCatalog.getReplicas(lfnChunk) if not replicasRes['OK']: gLogger.error("error: %s" % replicasRes['Message']) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes['Message']) replicasRes = replicasRes['Value'] if replicasRes['Failed']: retDict['NoReplicas'].update(replicasRes['Failed']) replicas.update(replicasRes['Successful']) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas, chunkSize): self.__write('.') res = self.fileCatalog.getFileMetadata(lfnChunk) if not res['OK']: return S_ERROR(errno.ENOENT, "error %s" % res['Message']) metadata.update(res['Value']['Successful']) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get( lfn, {}).get('Checksum') for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice('Getting checksum of %d replicas in %d SEs' % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel('FATAL') for num, se in enumerate(sorted(seFiles)): self.__write('\n%d. At %s (%d files): ' % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write('.') metadata = oSe.getFileMetadata(surlChunk) if not metadata['OK']: gLogger.error("Error: getFileMetadata returns %s. Ignore those replicas" % ( metadata['Message'])) # Remove from list of replicas as we don't know whether it is OK or # not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata['Value'] notFound += len(metadata['Failed']) for lfn in metadata['Failed']: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata['Successful']: checkSum.setdefault( lfn, {})[se] = metadata['Successful'][lfn]['Checksum'] if notFound: gLogger.error('%d files not found' % notFound) gLogger.setLevel(logLevel) gLogger.notice('Verifying checksum of %d files' % len(replicas)) for lfn in replicas: # get the lfn checksum from the FC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True lfcChecksum = csDict[lfn].pop('LFCChecksum') for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, '') if not surlChecksum or not compareAdler(lfcChecksum, surlChecksum): # if lfcChecksum does not match surlChecksum csDict[lfn][se] = {'PFNChecksum': surlChecksum} gLogger.info("ERROR!! checksum mismatch at %s for LFN %s: LFC checksum: %s , PFN checksum : %s " % (se, lfn, lfcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict['MissingAllReplicas'][lfn] = 'All' else: gLogger.info("=> All replicas have bad checksum", lfn) retDict['AllReplicasCorrupted'][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict['MissingReplica'][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn] return S_OK(retDict) ########################################################################## # properties def set_prod(self, value): """ Setter """ if value: value = int(value) res = self.transClient.getTransformation(value, extraParams=False) if not res['OK']: S_ERROR(errno.ENOENT, "Couldn't find transformation %d: %s" % (value, res['Message'])) else: self.transType = res['Value']['Type'] if self.interactive: gLogger.info("Production %d has type %s" % (value, self.transType)) else: value = 0 self._prod = value def get_prod(self): """ Getter """ return self._prod prod = property(get_prod, set_prod) def set_fileType(self, value): """ Setter """ self._fileType = [ft.upper() for ft in value] def get_fileType(self): """ Getter """ return self._fileType fileType = property(get_fileType, set_fileType) def set_fileTypesExcluded(self, value): """ Setter """ self._fileTypesExcluded = [ft.upper() for ft in value] def get_fileTypesExcluded(self): """ Getter """ return self._fileTypesExcluded fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded) def set_lfns(self, value): """ Setter """ if isinstance(value, basestring): value = [value] value = [v.replace(' ', '').replace('//', '/') for v in value] self._lfns = value def get_lfns(self): """ Getter """ return self._lfns lfns = property(get_lfns, set_lfns) ########################################################################## # # This part was backported from DataIntegrityClient # # # This section contains the specific methods for File Catalog->SE checks # def catalogDirectoryToSE(self, lfnDir): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self._getCatalogDirectoryContents(lfnDir) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = {'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas} return S_OK(resDict) def catalogFileToSE(self, lfns): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfns, basestring): lfns = [lfns] res = self._getCatalogMetadata(lfns) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] res = self._getCatalogReplicas(catalogMetadata.keys()) if not res['OK']: return res replicas, _zeroReplicaFiles = res['Value'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = {'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas} return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None): """ This method takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ # FIXME: we better use the compareChecksum function instead of this one! # or maybe directly checkFC2SE gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) seLfns = {} for lfn, replicaDict in replicas.iteritems(): for se, _url in replicaDict.iteritems(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20))) for se in sorted(seLfns): files = len(seLfns[se]) gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res['OK']: gLogger.error('Failed to get physical file metadata.', res['Message']) return res for lfn, metadata in res['Value'].iteritems(): if lfn in catalogMetadata: # and ( metadata['Size'] != 0 ): if metadata['Size'] != catalogMetadata[lfn]['Size']: sizeMismatch.append( (lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch') return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """ Check obtain the physical file metadata and check the files are available """ gLogger.info('Checking the integrity of %s physical files at %s' % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get metadata for lfns.', res['Message']) return res pfnMetadata = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res['Value']['Failed'].iteritems(): if re.search('File does not exist', reason): missingReplicas.append((lfn, 'deprecatedUrl', se, 'PFNMissing')) if missingReplicas: self.dic.reportProblematicReplicas(missingReplicas, se, 'PFNMissing') lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, metadata in pfnMetadata.iteritems(): if metadata.get('Lost'): lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost')) if metadata.get('Unavailable') or not metadata['Accessible']: unavailableReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNUnavailable')) if not metadata['Size']: zeroSizeReplicas.append((lfn, 'deprecatedUrl', se, 'PFNZeroSize')) if lostReplicas: self.dic.reportProblematicReplicas(lostReplicas, se, 'PFNLost') if unavailableReplicas: self.dic.reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable') if zeroSizeReplicas: self.dic.reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize') gLogger.info( 'Checking the integrity of physical files at %s complete' % se) return S_OK(pfnMetadata) ########################################################################## # # This section contains the specific methods for SE->File Catalog checks # def storageDirectoryToCatalog(self, lfnDir, storageElement): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info("-" * 40) gLogger.info("Performing the SE->FC check at %s" % storageElement) gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self.getStorageDirectoryContents(lfnDir, storageElement) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement) return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}}) def __checkCatalogForSEFiles(self, storageMetadata, storageElement): gLogger.info('Checking %s storage files exist in the catalog' % len(storageMetadata)) res = self.fileCatalog.getReplicas(storageMetadata) if not res['OK']: gLogger.error("Failed to get replicas for LFN", res['Message']) return res failedLfns = res['Value']['Failed'] successfulLfns = res['Value']['Successful'] notRegisteredLfns = [] for lfn in storageMetadata: if lfn in failedLfns: if 'No such file or directory' in failedLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) failedLfns.pop(lfn) elif storageElement not in successfulLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) if notRegisteredLfns: self.dic.reportProblematicReplicas( notRegisteredLfns, storageElement, 'LFNNotRegistered') if failedLfns: return S_ERROR(errno.ENOENT, 'Failed to obtain replicas') # For the LFNs found to be registered obtain the file metadata from the # catalog and verify against the storage metadata res = self._getCatalogMetadata(storageMetadata) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.iteritems(): lfnStorageMetadata = storageMetadata[lfn] if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (lfnStorageMetadata['Size'] != 0): sizeMismatch.append( (lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch') gLogger.info('Checking storage files exist in the catalog complete') resDict = {'CatalogMetadata': catalogMetadata, 'StorageMetadata': storageMetadata} return S_OK(resDict) def getStorageDirectoryContents(self, lfnDir, storageElement): """ This takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ gLogger.info('Obtaining the contents for %s directories at %s' % (len(lfnDir), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnDir) if not res['OK']: gLogger.error( "Failed to obtain existance of directories", res['Message']) return res for directory, error in res['Value']['Failed'].iteritems(): gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, 'Failed to determine existance of directory') directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sorted(directoryExists): exists = directoryExists[directory] if exists: activeDirs.append(directory) allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = se.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif currentDir in res['Value']['Failed']: gLogger.error('Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) return S_ERROR(errno.ENOENT, res['Value']['Failed'][currentDir]) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend(se.getLFNFromURL(dirContents['SubDirs']).get( 'Value', {}).get('Successful', [])) fileURLMetadata = dirContents['Files'] fileMetadata = {} res = se.getLFNFromURL(fileURLMetadata) if not res['OK']: gLogger.error('Failed to get directory content LFNs', res['Message']) return res for url, error in res['Value']['Failed'].iteritems(): gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, "Failed to get LFNs for PFNs") urlLfns = res['Value']['Successful'] for urlLfn, lfn in urlLfns.iteritems(): fileMetadata[lfn] = fileURLMetadata[urlLfn] allFiles.update(fileMetadata) zeroSizeFiles = [] for lfn in sorted(allFiles): if os.path.basename(lfn) == 'dirac_directory': allFiles.pop(lfn) else: metadata = allFiles[lfn] if not metadata['Size']: zeroSizeFiles.append( (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize')) if zeroSizeFiles: self.dic.reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize') gLogger.info('Obtained at total of %s files for directories at %s' % (len(allFiles), storageElement)) return S_OK(allFiles) def _getCatalogDirectoryContents(self, lfnDirs): """ Obtain the contents of the supplied directory, recursively """ def _getDirectoryContent(directory): """ Inner function: recursively scan a directory, returns list of LFNs """ filesInDirectory = {} gLogger.debug("Examining %s" % directory) res = self.fileCatalog.listDirectory(directory) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res if directory in res['Value']['Failed']: gLogger.error('Failed to get directory content', '%s %s' % (directory, res['Value']['Failed'][directory])) return S_ERROR('Failed to get directory content') if directory not in res['Value']['Successful']: return S_ERROR('Directory not existing?') # first, adding the files found in the current directory gLogger.debug("Files in %s: %d" % (directory, len( res['Value']['Successful'][directory]['Files']))) filesInDirectory.update(res['Value']['Successful'][directory]['Files']) # then, looking for subDirectories content if res['Value']['Successful'][directory]['SubDirs']: for l_dir in res['Value']['Successful'][directory]['SubDirs']: # recursion here subDirContent = _getDirectoryContent(l_dir) if not subDirContent['OK']: return subDirContent else: filesInDirectory.update(subDirContent['Value']) return S_OK(filesInDirectory) gLogger.info( 'Obtaining the catalog contents for %d directories' % len(lfnDirs)) allFiles = {} for lfnDir in lfnDirs: dirContent = _getDirectoryContent(lfnDir) if not dirContent['OK']: return dirContent else: gLogger.debug("Content of directory %s: %d files" % (lfnDir, len(dirContent['Value']))) allFiles.update(dirContent['Value']) gLogger.debug("Content of directories examined: %d files" % len(allFiles)) replicas = self.fileCatalog.getReplicas(list(allFiles)) if not replicas['OK']: return replicas if replicas['Value']['Failed']: return S_ERROR("Failures in replicas discovery") return S_OK({'Metadata': allFiles, 'Replicas': replicas['Value']['Successful']}) def _getCatalogReplicas(self, lfns): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info('Obtaining the replicas for %s files' % len(lfns)) zeroReplicaFiles = [] res = self.fileCatalog.getReplicas(lfns, allStatus=True) if not res['OK']: gLogger.error('Failed to get catalog replicas', res['Message']) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('File has zero replicas', error): zeroReplicaFiles.append(lfn) gLogger.info('Obtaining the replicas for files complete') return S_OK((allReplicas, zeroReplicaFiles)) def _getCatalogMetadata(self, lfns): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK({}) gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns)) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fileCatalog.getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get catalog metadata', res['Message']) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('No such file or directory', error): missingCatalogFiles.append(lfn) gLogger.info('Obtaining the catalog metadata complete') return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { 'TransformationID': [types.IntType, types.LongType], 'TransformationName': types.StringTypes, 'Status': types.StringTypes, 'Description': types.StringTypes, 'LongDescription': types.StringTypes, 'Type': types.StringTypes, 'Plugin': types.StringTypes, 'AgentType': types.StringTypes, 'FileMask': types.StringTypes, 'TransformationGroup': types.StringTypes, 'GroupSize': [types.IntType, types.LongType, types.FloatType], 'InheritedFrom': [types.IntType, types.LongType], 'Body': types.StringTypes, 'MaxNumberOfTasks': [types.IntType, types.LongType], 'EventsPerTask': [types.IntType, types.LongType] } self.paramValues = { 'TransformationID': 0, 'TransformationName': '', 'Status': 'New', 'Description': '', 'LongDescription': '', 'Type': '', 'Plugin': 'Standard', 'AgentType': 'Manual', 'FileMask': '', 'TransformationGroup': 'General', 'GroupSize': 1, 'InheritedFrom': 0, 'Body': '', 'MaxNumberOfTasks': 0, 'EventsPerTask': 0 } self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError, 'TransformationID %d does not exist' % transID else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE('TargetSE', seList) def setSourceSE(self, seList): return self.__setSE('SourceSE', seList) def __setSE(self, se, seList): if type(seList) in types.StringTypes: try: seList = eval(seList) except: seList = seList.replace(',', ' ').split() res = self.__checkSEs(seList) if not res['OK']: return res self.item_called = se return self.__setParam(seList) def __getattr__(self, name): if name.find('get') == 0: item = name[3:] self.item_called = item return self.__getParam if name.find('set') == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam(self): if self.item_called == 'Available': return S_OK(self.paramTypes.keys()) if self.item_called == 'Parameters': return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam(self, value): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type(value), self.paramTypes[self.item_called]) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key(self.item_called): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res['OK']: if printOutput: self._prettyPrint(res) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res['OK']: if printOutput: self._prettyPrint(res) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate') return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation('extendTransformation', nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation('cleanTransformation', printOutput=printOutput) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation('deleteTransformation', printOutput=printOutput) if res['OK']: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation('addFilesToTransformation', lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation('setFileStatusForTransformation', status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation('getTransformationTaskStats', printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation('getTransformationStats', printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation('deleteTasks', taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se='Unknown', printOutput=False): return self.__executeOperation('addTaskForTransformation', lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation('setTaskStatus', taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop('printOutput') fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles(self, fileStatus=[], lfns=[], outputFields=[ 'FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate' ], orderBy='FileID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'FileID', orderBy) return res def getTransformationTasks(self, taskStatus=[], taskIDs=[], outputFields=[ 'TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime' ], orderBy='TaskID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TaskID', orderBy) return res ############################################################################# def getTransformations(self, transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate' ], orderBy='TransformationID', printOutput=False): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): res = self._checkCreation() if not res['OK']: return self._errorReport(res, 'Failed transformation sanity check') if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup=self.paramValues['TransformationGroup'], groupSize=self.paramValues['GroupSize'], inheritedFrom=self.paramValues['InheritedFrom'], body=self.paramValues['Body'], maxTasks=self.paramValues['MaxNumberOfTasks'], eventsPerTask=self.paramValues['EventsPerTask'], addFiles=addFiles) if not res['OK']: if printOutput: self._prettyPrint(res) return res transID = res['Value'] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key(paramName): res = self.transClient.setTransformationParameter( transID, paramName, paramValue) if not res['OK']: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res['Message'])) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = [ 'TransformationName', 'Description', 'LongDescription', 'Type' ] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " ") plugin = self.paramValues['Plugin'] if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter('Plugin', choices=self.supportedPlugins, default='Standard') if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] #checkPlugin = "_check%sPlugin" % plugin #fcn = None #if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ): # fcn = getattr( self, checkPlugin ) #if not fcn: # return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin ) #res = fcn() return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues['GroupSize'] if (groupSize <= 0): gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize(1) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (', '.join(['SourceSE', 'TargetSE']))) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if (not self.paramValues.has_key(requiredParam)) or ( not self.paramValues[requiredParam]): paramValue = raw_input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable( getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(',', ' ').split() res = setter(ses) if not res['OK']: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections('/Resources/StorageElements') if not res['OK']: return self._errorReport(res, 'Failed to get possible StorageElements') missing = [] for se in seList: if not se in res['Value']: gLogger.error("StorageElement %s is not known" % se) missing.append(se) if missing: return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default='', insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res['OK']: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res['Value'])) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter(paramValue) if not res['OK']: return res return S_OK(paramValue)
class FileStatusTransformationAgent(AgentModule): """ FileStatusTransformationAgent """ def __init__(self, *args, **kwargs): AgentModule.__init__(self, *args, **kwargs) self.name = 'FileStatusTransformationAgent' self.enabled = False self.shifterProxy = 'DataManager' self.transformationTypes = ["Replication"] self.transformationStatuses = ["Active"] self.transformationFileStatuses = ["Assigned", "Problematic", "Processed", "Unused"] self.addressTo = ["*****@*****.**"] self.addressFrom = "*****@*****.**" self.emailSubject = "FileStatusTransformationAgent" self.accounting = defaultdict(list) self.errors = [] self.fcClient = FileCatalogClient() self.tClient = TransformationClient() self.reqClient = ReqClient() self.nClient = NotificationClient() def checkFileStatusFuncExists(self, status): """ returns True/False if a function to check transformation files with a given status exists or not """ checkFileStatusFuncName = "check_%s_files" % (status.lower()) if not (hasattr(self, checkFileStatusFuncName) and callable(getattr(self, checkFileStatusFuncName))): self.log.warn("Unable to process transformation files with status ", status) return False return True def beginExecution(self): """ Reload the configurations before every cycle """ self.enabled = self.am_getOption('EnableFlag', False) self.shifterProxy = self.am_setOption('shifterProxy', 'DataManager') self.transformationTypes = self.am_getOption('TransformationTypes', ["Replication"]) self.transformationStatuses = self.am_getOption('TransformationStatuses', ["Active"]) self.transformationFileStatuses = self.am_getOption( 'TransformationFileStatuses', ["Assigned", "Problematic", "Processed", "Unused"]) self.addressTo = self.am_getOption('MailTo', ["*****@*****.**"]) self.addressFrom = self.am_getOption('MailFrom', "*****@*****.**") self.transformationFileStatuses = filter(self.checkFileStatusFuncExists, self.transformationFileStatuses) self.accounting.clear() return S_OK() def sendNotification(self, transID, transType=None, sourceSEs=None, targetSEs=None): """ sends email notification about accounting information of a transformation """ if not(self.errors or self.accounting): return S_OK() emailBody = "Transformation ID: %s\n" % transID if transType: emailBody += "Transformation Type: %s\n" % transType if sourceSEs: emailBody += "Source SE: %s\n" % (" ".join(str(source) for source in sourceSEs)) if targetSEs: emailBody += "Target SE: %s\n\n" % (" ".join(str(target) for target in targetSEs)) rows = [] for action, transFiles in self.accounting.iteritems(): emailBody += "Total number of files with action %s: %s\n" % (action, len(transFiles)) for transFile in transFiles: rows.append([[transFile['LFN']], [str(transFile['AvailableOnSource'])], [str(transFile['AvailableOnTarget'])], [transFile['Status']], [action]]) if rows: columns = ["LFN", "Source", "Target", "Old Status", "Action"] emailBody += printTable(columns, rows, printOut=False, numbering=False, columnSeparator=' | ') if self.errors: emailBody += "\n\nErrors:" emailBody += "\n".join(self.errors) self.log.notice(emailBody) subject = "%s: %s" % (self.emailSubject, transID) for address in self.addressTo: res = self.nClient.sendMail(address, subject, emailBody, self.addressFrom, localAttempt=False) if not res['OK']: self.log.error("Failure to send Email notification to ", address) continue self.errors = [] self.accounting.clear() return S_OK() def logError(self, errStr, varMsg=''): self.log.error(errStr, varMsg) self.errors.append(errStr + varMsg) def execute(self): """ main execution loop of Agent """ res = self.getTransformations() if not res['OK']: self.log.error('Failure to get transformations', res['Message']) return S_ERROR("Failure to get transformations") transformations = res['Value'] if not transformations: self.log.notice('No transformations found with Status %s and Type %s ' % (self.transformationStatuses, self.transformationTypes)) return S_OK() self.log.notice('Will treat %d transformations' % len(transformations)) self.log.notice('Transformations: %s' % ",".join([str(transformation['TransformationID']) for transformation in transformations])) for trans in transformations: transID = trans['TransformationID'] if 'SourceSE' not in trans or not trans['SourceSE']: self.logError("SourceSE not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID) continue if 'TargetSE' not in trans or not trans['TargetSE']: self.logError("TargetSE not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID, sourceSEs=trans['SourceSE']) continue if 'DataTransType' not in trans: self.logError("Transformation Type not set for transformation, skip processing, transID: ", "%s" % transID) self.sendNotification(transID, sourceSEs=trans['SourceSE'], targetSEs=trans['TargetSE']) continue res = self.processTransformation(transID, trans['SourceSE'], trans['TargetSE'], trans['DataTransType']) if not res['OK']: self.log.error('Failure to process transformation with ID:', transID) continue return S_OK() def getTransformations(self, transID=None): """ returns transformations of a given type and status """ res = None if transID: res = self.tClient.getTransformations( condDict={'TransformationID': transID, 'Status': self.transformationStatuses, 'Type': self.transformationTypes}) else: res = self.tClient.getTransformations( condDict={'Status': self.transformationStatuses, 'Type': self.transformationTypes}) if not res['OK']: return res result = res['Value'] for trans in result: res = self.tClient.getTransformationParameters(trans['TransformationID'], ['SourceSE', 'TargetSE']) if not res['OK']: self.log.error('Failure to get SourceSE and TargetSE parameters for Transformation ID:', trans['TransformationID']) continue trans['SourceSE'] = eval(res['Value']['SourceSE']) trans['TargetSE'] = eval(res['Value']['TargetSE']) res = self.getDataTransformationType(trans['TransformationID']) if not res['OK']: self.log.error('Failure to determine Data Transformation Type', "%s: %s" % (trans['TransformationID'], res['Message'])) continue trans['DataTransType'] = res['Value'] return S_OK(result) def getRequestStatus(self, transID, taskIDs): """ returns request statuses for a given list of task IDs """ res = self.tClient.getTransformationTasks(condDict={'TransformationID': transID, 'TaskID': taskIDs}) if not res['OK']: self.log.error('Failure to get Transformation Tasks for Transformation ID:', transID) return res result = res['Value'] requestStatus = {} for task in result: requestStatus[task['TaskID']] = {'RequestStatus': task['ExternalStatus'], 'RequestID': long(task['ExternalID'])} return S_OK(requestStatus) def getDataTransformationType(self, transID): """ returns transformation types Replication/Moving/Unknown for a given transformation """ res = self.tClient.getTransformationParameters(transID, 'Body') if not res['OK']: return res # if body is empty then we assume that it is a replication transformation if not res['Value']: return S_OK(REPLICATION_TRANS) replication = False rmReplica = False try: body = json.loads(res['Value']) for operation in body: if 'ReplicateAndRegister' in operation: replication = True if 'RemoveReplica' in operation: rmReplica = True except ValueError: if 'ReplicateAndRegister' in res['Value']: replication = True if 'RemoveReplica' in res['Value']: rmReplica = True if rmReplica and replication: return S_OK(MOVING_TRANS) if replication: return S_OK(REPLICATION_TRANS) return S_ERROR("Unknown Transformation Type '%r'" % res['Value']) def setFileStatus(self, transID, transFiles, status): """ sets transformation file status """ lfns = [transFile['LFN'] for transFile in transFiles] lfnStatuses = {lfn: status for lfn in lfns} if lfnStatuses: if self.enabled: res = self.tClient.setFileStatusForTransformation(transID, newLFNsStatus=lfnStatuses, force=True) if not res['OK']: self.logError('Failed to set statuses for LFNs ', "%s" % res['Message']) return res for transFile in transFiles: self.accounting[status].append({'LFN': transFile['LFN'], 'Status': transFile['Status'], 'AvailableOnSource': transFile['AvailableOnSource'], 'AvailableOnTarget': transFile['AvailableOnTarget']}) return S_OK() def selectFailedRequests(self, transFile): """ returns True if transformation file has a failed request otherwise returns False """ res = self.getRequestStatus(transFile['TransformationID'], transFile['TaskID']) if not res['OK']: self.log.error('Failure to get Request Status for Assigned File') return False result = res['Value'] if result[transFile['TaskID']]['RequestStatus'] == 'Failed': return True return False def retryStrategyForFiles(self, transID, transFiles): """ returns retryStrategy Reset Request if a request is found in RMS, otherwise returns set file status to unused""" taskIDs = [transFile['TaskID'] for transFile in transFiles] res = self.getRequestStatus(transID, taskIDs) if not res['OK']: return res result = res['Value'] retryStrategy = defaultdict(dict) for taskID in taskIDs: if taskID is None: self.log.error("Task ID is None", "Transformation: %s\n Files: %r " % (transID, transFiles)) retryStrategy[None]['Strategy'] = SET_UNUSED continue res = self.reqClient.getRequest(requestID=result[taskID]['RequestID']) if not res['OK']: self.log.notice('Request %s does not exist setting file status to unused' % result[taskID]['RequestID']) retryStrategy[taskID]['Strategy'] = SET_UNUSED else: retryStrategy[taskID]['Strategy'] = SET_UNUSED # RESET_REQUEST retryStrategy[taskID]['RequestID'] = result[taskID]['RequestID'] return S_OK(retryStrategy) def check_assigned_files(self, actions, transFiles, transType): """ treatment for transformation files with assigned status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: if transType == REPLICATION_TRANS: actions[SET_PROCESSED].append(transFile) if transType == MOVING_TRANS: actions[RETRY].append(transFile) elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) else: # not on src and target actions[SET_DELETED].append(transFile) def check_unused_files(self, actions, transFiles, transType): """ treatment for transformation files with unused status """ for transFile in transFiles: if not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[SET_DELETED].append(transFile) def check_processed_files(self, actions, transFiles, transType): """ treatment for transformation files with processed status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget'] and transType == MOVING_TRANS: actions[RETRY].append(transFile) if transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) if not transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[SET_DELETED].append(transFile) def check_problematic_files(self, actions, transFiles, transType): """ treatment for transformation files with problematic status """ for transFile in transFiles: if transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: if transType == REPLICATION_TRANS: actions[SET_PROCESSED].append(transFile) if transType == MOVING_TRANS: actions[RETRY].append(transFile) elif transFile['AvailableOnSource'] and not transFile['AvailableOnTarget']: actions[RETRY].append(transFile) elif not transFile['AvailableOnSource'] and transFile['AvailableOnTarget']: actions[SET_PROCESSED].append(transFile) else: # not available on source and target actions[SET_DELETED].append(transFile) def retryFiles(self, transID, transFiles): """ resubmits request or sets file status to unused based on the retry strategy of transformation file """ setFilesUnused = [] setFilesAssigned = [] res = self.retryStrategyForFiles(transID, transFiles) if not res['OK']: self.logError('Failure to determine retry strategy (unused / reset request) for files ', "%s" % res['Message']) return res retryStrategy = res['Value'] for transFile in transFiles: if retryStrategy[transFile['TaskID']]['Strategy'] != RESET_REQUEST: setFilesUnused.append(transFile) continue requestID = retryStrategy[transFile['TaskID']]['RequestID'] if self.enabled: res = self.reqClient.resetFailedRequest(requestID, allR=True) if not res['OK']: self.logError('Failed to reset request ', 'ReqID: %s Error: %s' % (requestID, res['Message'])) continue if res['Value'] == "Not reset": self.logError('Failed to reset request ', 'ReqID: %s is non-recoverable' % requestID) continue setFilesAssigned.append(transFile) res = self.tClient.setTaskStatus(transID, transFile['TaskID'], 'Waiting') if not res['OK']: self.logError('Failure to set Waiting status for Task ID: ', "%s %s" % (transFile['TaskID'], res['Message'])) continue self.accounting[RESET_REQUEST].append({'LFN': transFile['LFN'], 'Status': transFile['Status'], 'AvailableOnSource': transFile['AvailableOnSource'], 'AvailableOnTarget': transFile['AvailableOnTarget']}) if setFilesUnused: self.setFileStatus(transID, setFilesUnused, 'Unused') if setFilesAssigned: self.setFileStatus(transID, setFilesAssigned, 'Assigned') return S_OK() def applyActions(self, transID, actions): """ sets new file statuses and resets requests """ for action, transFiles in actions.iteritems(): if action == SET_PROCESSED and transFiles: self.setFileStatus(transID, transFiles, 'Processed') if action == SET_DELETED and transFiles: self.setFileStatus(transID, transFiles, 'Deleted') if action == RETRY and transFiles: # if there is a request in RMS then reset request otherwise set file status unused self.retryFiles(transID, transFiles) def existsInFC(self, storageElements, lfns): """ checks if files have replicas registered in File Catalog for all given storageElements """ res = self.fcClient.getReplicas(lfns) if not res['OK']: return res result = {} result['Successful'] = {} result['Failed'] = {} setOfSEs = set(storageElements) for lfn, msg in res['Value']['Failed'].iteritems(): if msg == 'No such file or directory': result['Successful'][lfn] = False else: result['Failed'][lfn] = msg # check if all replicas are registered in FC filesFoundInFC = res['Value']['Successful'] for lfn, replicas in filesFoundInFC.iteritems(): result['Successful'][lfn] = setOfSEs.issubset(replicas.keys()) return S_OK(result) def existsOnSE(self, storageElements, lfns): """ checks if the given files exist physically on a list of storage elements""" result = {} result['Failed'] = {} result['Successful'] = {} if not lfns: return S_OK(result) voName = lfns[0].split('/')[1] for se in storageElements: res = StorageElement(se, vo=voName).exists(lfns) if not res['OK']: return res for lfn, status in res['Value']['Successful'].iteritems(): if lfn not in result['Successful']: result['Successful'][lfn] = status if not status: result['Successful'][lfn] = False result['Failed'][se] = res['Value']['Failed'] return S_OK(result) def exists(self, storageElements, lfns): """ checks if files exists on both file catalog and storage elements """ fcRes = self.existsInFC(storageElements, lfns) if not fcRes['OK']: self.logError('Failure to determine if files exists in File Catalog ', "%s" % fcRes['Message']) return fcRes if fcRes['Value']['Failed']: self.logError("Failed FileCatalog Response ", "%s" % fcRes['Value']['Failed']) # check if files found in file catalog also exist on SE checkLFNsOnStorage = [lfn for lfn in fcRes['Value']['Successful'] if fcRes['Value']['Successful'][lfn]] # no files were found in FC, return the result instead of verifying them on SE if not checkLFNsOnStorage: return fcRes seRes = self.existsOnSE(storageElements, checkLFNsOnStorage) if not seRes['OK']: self.logError('Failure to determine if files exist on SE ', "%s" % seRes['Message']) return seRes for se in storageElements: if seRes['Value']['Failed'][se]: self.logError('Failed to determine if files exist on SE ', "%s %s" % (se, seRes['Value']['Failed'][se])) return S_ERROR() fcResult = fcRes['Value']['Successful'] seResult = seRes['Value']['Successful'] for lfn in fcResult: if fcResult[lfn] and not seResult[lfn]: fcRes['Value']['Successful'][lfn] = False return fcRes def processTransformation(self, transID, sourceSE, targetSEs, transType): """ process transformation for a given transformation ID """ actions = {} actions[SET_PROCESSED] = [] actions[RETRY] = [] actions[SET_DELETED] = [] for status in self.transformationFileStatuses: res = self.tClient.getTransformationFiles(condDict={'TransformationID': transID, 'Status': status}) if not res['OK']: errStr = 'Failure to get Transformation Files, Status: %s Transformation ID: %s Message: %s' % (status, transID, res['Message']) self.logError(errStr) continue transFiles = res['Value'] if not transFiles: self.log.notice("No Transformation Files found with status %s for Transformation ID %d" % (status, transID)) continue self.log.notice("Processing Transformation Files with status %s for TransformationID %d " % (status, transID)) if status == 'Assigned': transFiles = filter(self.selectFailedRequests, transFiles) lfns = [transFile['LFN'] for transFile in transFiles] if not lfns: continue res = self.exists(sourceSE, lfns) if not res['OK']: continue resultSourceSe = res['Value']['Successful'] res = self.exists(targetSEs, lfns) if not res['OK']: continue resultTargetSEs = res['Value']['Successful'] for transFile in transFiles: lfn = transFile['LFN'] transFile['AvailableOnSource'] = resultSourceSe[lfn] transFile['AvailableOnTarget'] = resultTargetSEs[lfn] checkFilesFuncName = "check_%s_files" % status.lower() checkFiles = getattr(self, checkFilesFuncName) checkFiles(actions, transFiles, transType) self.applyActions(transID, actions) self.sendNotification(transID, transType, sourceSE, targetSEs) return S_OK()
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare'] ) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError( 'TransformationID %d does not exist' % transID ) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def setBody( self, body ): """ check that the body is a string, or using the proper syntax for multiple operations :param body: transformation body, for example .. code :: python body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }), ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ), ] :type body: string or list of tuples (or lists) of string and dictionaries :raises TypeError: If the structure is not as expected :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used :returns: S_OK, S_ERROR """ self.item_called = "Body" if isinstance( body, basestring ): return self.__setParam( body ) if not isinstance( body, ( list, tuple ) ): raise TypeError( "Expected list or string, but %r is %s" % ( body, type( body ) ) ) for tup in body: if not isinstance( tup, ( tuple, list ) ): raise TypeError( "Expected tuple or list, but %r is %s" % ( tup, type( tup ) ) ) if len( tup ) != 2: raise TypeError( "Expected 2-tuple, but %r is length %d" % ( tup, len( tup ) ) ) if not isinstance( tup[0], basestring ): raise TypeError( "Expected string, but first entry in tuple %r is %s" % ( tup, type( tup[0] ) ) ) if not isinstance( tup[1], dict ): raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % ( tup, type( tup[0] ) ) ) for par, val in tup[1].iteritems(): if not isinstance( par, basestring ): raise TypeError( "Expected string, but key in dictionary %r is %s" % ( par, type( par ) ) ) if not par in Operation.ATTRIBUTE_NAMES: raise ValueError( "Unknown attribute for Operation: %s" % par ) if not isinstance( val, ( basestring, int, long, float, list, tuple, dict ) ): raise TypeError( "Cannot encode %r, in json" % ( val ) ) return self.__setParam( json.dumps( body ) ) def __setSE( self, seParam, seList ): if isinstance( seList, basestring ): try: seList = eval( seList ) except: seList = seList.split( ',' ) elif isinstance( seList, ( list, dict, tuple ) ): seList = list( seList ) else: return S_ERROR( "Bad argument type" ) res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = seParam return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError( name ) def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called ) def __setParam( self, value ): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) ) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getAuthorDNfromProxy( self ): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error( "Unable to get uploaded proxy Info %s " % res['Message'] ) return S_ERROR( res['Message'] ) res = {'username' : username, 'authorDN' : author } return S_OK( res ) ############################################################################# def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error( res['Message'] ) return S_ERROR( res['Message'] ) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not ( userName == "" or userName == foundUserName ): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) ) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) ) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % ( userName, ', '.join( transStatus ) ) ) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % ( authorDN, ', '.join( transStatus ) ) ) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getSummaryTransformations( self , transID = [] ): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = { 'TransformationID' : transID } orderby = [] start = 0 maxitems = len( transID ) paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \ 'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \ 'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \ 'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems ) if not result['OK']: self._prettyPrint( result ) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map( lambda pname: paramValues[ paramNames.index( pname ) ], paramShowNames ) showDict = dict( zip( paramShowNamesShort, paramShowValues ) ) dictList.append( showDict ) except Exception as x: print 'Exception %s ' % str( x ) if not len( dictList ) > 0: gLogger.error( 'No found transformations satisfying input condition' ) return S_ERROR( 'No found transformations satisfying input condition' ) else: print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] ) return S_OK( dictList ) ############################################################################# def addTransformation( self, addFiles = True, printOutput = False ): res = self._checkCreation() if not res['OK']: return self._errorReport( res, 'Failed transformation sanity check' ) if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint( self.paramValues ) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup = self.paramValues['TransformationGroup'], groupSize = self.paramValues['GroupSize'], inheritedFrom = self.paramValues['InheritedFrom'], body = self.paramValues['Body'], maxTasks = self.paramValues['MaxNumberOfTasks'], eventsPerTask = self.paramValues['EventsPerTask'], addFiles = addFiles ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transID = res['Value'] self.exists = True self.setTransformationID( transID ) gLogger.notice( "Created transformation %d" % transID ) for paramName, paramValue in self.paramValues.items(): if paramName not in self.paramTypes: res = self.transClient.setTransformationParameter( transID, paramName, paramValue ) if not res['OK']: gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) ) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice( "oTransformation = Transformation(%d)" % transID ) gLogger.notice( "oTransformation.set%s(...)" % paramName ) return S_OK( transID ) def _checkCreation( self ): """ Few checks """ if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info( "oTransformation.reset()" ) return S_ERROR() requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type'] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter ) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " " ) plugin = self.paramValues['Plugin'] if plugin: if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin ) res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' ) if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] return S_OK() def _checkBySizePlugin( self ): return self._checkStandardPlugin() def _checkBySharePlugin( self ): return self._checkStandardPlugin() def _checkStandardPlugin( self ): groupSize = self.paramValues['GroupSize'] if groupSize <= 0: gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize( 1 ) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin( self ): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( ', '.join( ['SourceSE', 'TargetSE'] ) ) ) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if not self.paramValues.get( requiredParam ): paramValue = raw_input( "Please enter " + requiredParam + " " ) setter = None setterName = "set%s" % requiredParam if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName ) ses = paramValue.replace( ',', ' ' ).split() res = setter( ses ) if not res['OK']: return res return S_OK() def __checkSEs( self, seList ): res = gConfig.getSections( '/Resources/StorageElements' ) if not res['OK']: return self._errorReport( res, 'Failed to get possible StorageElements' ) missing = set( seList ) - set( res['Value'] ) if missing: for se in missing: gLogger.error( "StorageElement %s is not known" % se ) return S_ERROR( "%d StorageElements not known" % len( missing ) ) return S_OK() def __promptForParameter( self, parameter, choices = [], default = '', insert = True ): res = promptUser( "Please enter %s" % parameter, choices = choices, default = default ) if not res['OK']: return self._errorReport( res ) gLogger.notice( "%s will be set to '%s'" % ( parameter, res['Value'] ) ) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter( paramValue ) if not res['OK']: return res return S_OK( paramValue )
class TransformationCLI(CLI, API): def __init__(self): self.server = TransformationClient() self.indentSpace = 4 CLI.__init__(self) API.__init__(self) def printPair(self, key, value, separator=":"): valueList = value.split("\n") print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)), separator, valueList[0].strip()) for valueLine in valueList[1:-1]: print "%s %s" % (" " * self.indentSpace, valueLine.strip()) def do_help(self, args): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commands""" CLI.do_help(self, args) # overriting default help command def do_helpall(self, args): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len(args) == 0: print "\nAvailable commands:\n" attrList = dir(self) attrList.sort() for attribute in attrList: if attribute.find("do_") == 0: self.printPair(attribute[3:], getattr(self, attribute).__doc__[1:]) print "" else: command = args.split()[0].strip() try: obj = getattr(self, "do_%s" % command) except: print "There's no such %s command" % command return self.printPair(command, obj.__doc__[1:]) def do_shell(self, args): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall(0, comm) if res['OK'] and res['Value'][0] == 0: _returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % (stdOut, stdErr) else: print res['Message'] def check_params(self, args, num): """Checks if the number of parameters correct""" argss = args.split() length = len(argss) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num) return (False, length) return (argss, length) def check_id_or_name(self, id_or_name): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long(id_or_name) # its look like id return id_or_name #################################################################### # # These are the methods for transformation manipulation # def do_getall(self, args): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.getTransformations(transStatus=args.split(), printOutput=True) def do_getAllByUser(self, args): """Get all transformations created by a given user The first argument is the authorDN or username. The authorDN is preferred: it need to be inside quotes because contains white spaces. Only authorDN should be quoted. When the username is provided instead, the authorDN is retrieved from the uploaded proxy, so that the retrieved transformations are those created by the user who uploaded that proxy: that user could be different that the username provided to the function. usage: getAllByUser authorDN or username [Status] [Status] """ oTrans = Transformation() argss = args.split() username = "" author = "" status = [] if not len(argss) > 0: print self.do_getAllByUser.__doc__ return # if the user didnt quoted the authorDN ends if '=' in argss[0] and argss[0][0] not in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return if argss[0][0] in ["'", '"']: # authorDN given author = argss[0] status_idx = 1 for arg in argss[1:]: author += ' ' + arg status_idx += 1 if arg[-1] in ["'", '"']: break # At this point we should have something like 'author' if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return else: author = author[1:-1] # throw away the quotes # the rest are the requested status status = argss[status_idx:] else: # username given username = argss[0] status = argss[1:] oTrans.getTransformationsByUser(authorDN=author, userName=username, transStatus=status, printOutput=True) def do_summaryTransformations(self, args): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. Usage: summaryTransformations <ProdID> [<ProdID> ...] """ argss = args.split() if not len(argss) > 0: print self.do_summaryTransformations.__doc__ return transid = argss oTrans = Transformation() oTrans.getSummaryTransformations(transID=transid) def do_getStatus(self, args): """Get transformation details usage: getStatus <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation(transName) if not res['OK']: print "Getting status of %s failed: %s" % (transName, res['Message']) else: print "%s: %s" % (transName, res['Value']['Status']) def do_setStatus(self, args): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = args.split() if not len(argss) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status) if not res['OK']: print "Setting status of %s failed: %s" % (transName, res['Message']) else: print "%s set to %s" % (transName, status) def do_start(self, args): """Start transformation usage: start <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active') if not res['OK']: print "Setting Status of %s failed: %s" % (transName, res['Message']) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic') if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message']) else: print "%s started" % transName def do_stop(self, args): """Stop transformation usage: stop <transID|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual') if not res['OK']: print "Stopping of %s failed: %s" % (transName, res['Message']) else: print "%s stopped" % transName def do_flush(self, args): """Flush transformation usage: flush <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush') if not res['OK']: print "Flushing of %s failed: %s" % (transName, res['Message']) else: print "%s flushing" % transName def do_get(self, args): """Get transformation definition usage: get <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: res['Value'].pop('Body') printDict(res['Value']) def do_getBody(self, args): """Get transformation body usage: getBody <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: print res['Value']['Body'] def do_getFileStat(self, args): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats(transName) if not res['OK']: print "Failed to get statistics for %s: %s" % (transName, res['Message']) else: res['Value'].pop('Total') printDict(res['Value']) def do_modMask(self, args): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message']) else: print "Updated %s filemask" % transName def do_getFiles(self, args): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN') else: print "No files found" def do_getFileStatus(self, args): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = args.split() if len(argss) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append(fileDict) if filesList: self._printFormattedDictList(filesList, [ 'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate' ], 'LFN', 'LFN') else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_getOutputFiles(self, args): """Get output files for the transformation usage: getOutputFiles <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: fc = FileCatalog() meta = {} meta['ProdID'] = transName res = fc.findFilesByMetadata(meta) if not res['OK']: print res['Message'] return if not len(res['Value']) > 0: print 'No output files yet for transformation %d' % int( transName) return else: for lfn in res['Value']: print lfn def do_getInputDataQuery(self, args): """Get input data query for the transformation usage: getInputDataQuery <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationInputDataQuery(transName) if not res['OK']: print "Failed to get transformation input data query: %s" % res[ 'Message'] else: print res['Value'] def do_setFileStatus(self, args): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = args.split() if not len(argss) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn]) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile(self, args): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfns> """ argss = args.split() if not len(argss) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if 'Failed' in res['Value']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns) def do_resetProcessedFile(self, args): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = args.split() if not len(argss) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation(transName, 'Unused', lfns, force=True) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if 'Failed' in res['Value'] and res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns) #################################################################### # # These are the methods for file manipulation # def do_addDirectory(self, args): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = args.split() if not len(argss) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory(directory, force=True) if not res['OK']: print 'failed to add directory %s: %s' % (directory, res['Message']) else: print 'added %s files for %s' % (res['Value'], directory) def do_replicas(self, args): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print "no files supplied" return res = self.server.getReplicas(argss) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % (lfn, error) for lfn in sorted(res['Value']['Successful'].keys()): ses = sorted(res['Value']['Successful'][lfn].keys()) outStr = "%s :" % lfn.ljust(100) for se in ses: outStr = "%s %s" % (outStr, se.ljust(15)) print outStr def do_addFile(self, args): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': 'IGNORED-SE', 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addFile(lfnDict, force=True) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % (lfn, error) for lfn in sorted(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeFile(self, args): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print "no files supplied" return res = self.server.removeFile(argss) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % (lfn, error) for lfn in sorted(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_addReplica(self, args): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = args.split() if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addReplica(lfnDict, force=True) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % (error) for lfn in sorted(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeReplica(self, args): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = args.split() if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.removeReplica(lfnDict) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % (error) for lfn in sorted(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_setReplicaStatus(self, args): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = args.split() if not len(argss) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = { 'Status': status, 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.setReplicaStatus(lfnDict) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % (error) for lfn in sorted(res['Value']['Successful'].keys()): print "updated replica status %s" % lfn
transClient = TransformationClient() for transID in idList: lfns = lfnsExplicit if not lfns: res = transClient.getTransformation(transID) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] DIRAC.exit(2) selectDict = { 'TransformationID': res['Value']['TransformationID'], 'Status': status } res = transClient.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get files: %s" % res['Message'] DIRAC.exit(2) lfns = [d['LFN'] for d in res['Value']] if not lfns: print "No files found in transformation %s, status %s" % ( transID, status) if not lfns: print "No files to be set in transformation", transID else: resetFiles = 0 failed = {} for lfnChunk in breakListIntoChunks(lfns, 10000):
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare'] ) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError( 'TransformationID %d does not exist' % transID ) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def __setSE( self, seParam, seList ): if isinstance( seList, basestring ): try: seList = eval( seList ) except: seList = seList.split( ',' ) elif isinstance( seList, ( list, dict, tuple ) ): seList = list( seList ) else: return S_ERROR( "Bad argument type" ) res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = seParam return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError( name ) def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called ) def __setParam( self, value ): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) ) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getAuthorDNfromProxy( self ): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error( "Unable to get uploaded proxy Info %s " %res['Message'] ) return S_ERROR( res['Message'] ) res = {'username' : username, 'authorDN' : author } return S_OK( res ) ############################################################################# def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error( res['Message'] ) return S_ERROR( res['Message'] ) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not ( userName == "" or userName == foundUserName ): gLogger.error("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName)) return S_ERROR("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info("Will list transformations created by user '%s' with status '%s'" %(userName, ', '.join( transStatus ))) else: gLogger.info("Will list transformations created by '%s' with status '%s'" %(authorDN, ', '.join( transStatus ))) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getSummaryTransformations( self , transID = []): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = { 'TransformationID' : transID } orderby = [] start = 0 maxitems = len(transID) paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\ 'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\ 'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\ 'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems ) if not result['OK']: self._prettyPrint( result ) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map(lambda pname: paramValues[ paramNames.index(pname) ], paramShowNames) showDict = dict(zip( paramShowNamesShort, paramShowValues )) dictList.append( showDict ) except Exception, x: print 'Exception %s ' %str(x) if not len(dictList) > 0: gLogger.error( 'No found transformations satisfying input condition') return S_ERROR( 'No found transformations satisfying input condition') else: print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] ) return S_OK( dictList )
class fakeClient: def __init__(self, trans, transID, lfns, asIfProd): self.trans = trans self.transID = transID from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient self.transClient = TransformationClient() from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient self.bk = BookkeepingClient() from DIRAC.DataManagementSystem.Client.DataManager import DataManager self.dm = DataManager() self.asIfProd = asIfProd (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns) def addFilesToTransformation(self, transID, lfns): return S_OK({ 'Failed': {}, 'Successful': dict([(lfn, 'Added') for lfn in lfns]) }) def getTransformation(self, transID, extraParams=False): if transID == self.transID and self.asIfProd: transID = self.asIfProd if transID != self.transID: return self.transClient.getTransformation(transID) res = self.trans.getType() return DIRAC.S_OK({'Type': res['Value']}) def getReplicas(self): return self.transReplicas def getFiles(self): return self.transFiles def getCounters(self, table, attrList, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] != self.transID: return self.transClient.getCounters(table, attrList, condDict) possibleTargets = [ 'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW', 'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW' ] counters = [] for se in possibleTargets: counters.append(({'UsedSE': se}, 0)) return DIRAC.S_OK(counters) def getBookkeepingQuery(self, transID): if transID == self.transID and self.asIfProd: return self.transClient.getBookkeepingQuery(asIfProd) return self.trans.getBkQuery() def insertTransformationRun(self, transID, runID, xx): return DIRAC.S_OK() def getTransformationRuns(self, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] == self.transID: transRuns = [] runs = condDict.get('RunNumber', []) if not runs and self.transFiles: res = self.bk.getFileMetadata( [fileDict['LFN'] for fileDict in self.transFiles]) if not res['OK']: return res runs = list( set(meta['RunNumber'] for meta in res['Value']['Successful'].itervalues())) for run in runs: transRuns.append({ 'RunNumber': run, 'Status': "Active", "SelectedSite": None }) return DIRAC.S_OK(transRuns) else: return self.transClient.getTransformationRuns(condDict) def getTransformationFiles(self, condDict=None): if condDict.get('TransformationID') == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict.get('TransformationID') == self.transID: transFiles = [] if 'Status' in condDict and 'Unused' not in condDict['Status']: return DIRAC.S_OK(transFiles) runs = None if 'RunNumber' in condDict: runs = condDict['RunNumber'] if not isinstance(runs, list): runs = [runs] for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: transFiles.append({ 'LFN': fileDict['LFN'], 'Status': 'Unused', 'RunNumber': fileDict['RunNumber'] }) return DIRAC.S_OK(transFiles) else: return self.transClient.getTransformationFiles(condDict=condDict) def setParameterToTransformationFiles(self, transID, lfnDict): """ Update the transFiles with some parameters """ if transID == self.transID: for fileDict in self.transFiles: fileDict.update(lfnDict.get(fileDict['LFN'], {})) return S_OK() else: return self.transClient.setParameterToTransformationFiles( transID, lfnDict) def getTransformationFilesCount(self, transID, field, selection=None): if selection is None: selection = {} if transID == self.transID or selection.get( 'TransformationID') == self.transID: runs = selection.get('RunNumber') if runs and not isinstance(runs, list): runs = [runs] if field == 'Status': counters = {'Unused': 0} for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: counters['Unused'] += 1 elif field == 'RunNumber': counters = {} for fileDict in self.transFiles: runID = fileDict['RunNumber'] if not runs or runID in runs: counters.setdefault(runID, 0) counters[runID] += 1 else: return DIRAC.S_ERROR('Not implemented for field ' + field) counters['Total'] = sum(count for count in counters.itervalues()) return DIRAC.S_OK(counters) else: return self.transClient.getTransformationFilesCount( transID, field, selection=selection) def getTransformationRunStats(self, transIDs): counters = {} for transID in transIDs: if transID == self.transID: for fileDict in self.transFiles: runID = fileDict['RunNumber'] counters[transID][runID]['Unused'] = counters.setdefault( transID, {}).setdefault(runID, {}).setdefault( 'Unused', 0) + 1 for runID in counters[transID]: counters[transID][runID]['Total'] = counters[transID][ runID]['Unused'] else: res = self.transClient.getTransformationRunStats(transIDs) if res['OK']: counters.update(res['Value']) else: return res return DIRAC.S_OK(counters) def addRunsMetadata(self, runID, val): return self.transClient.addRunsMetadata(runID, val) def getRunsMetadata(self, runID): return self.transClient.getRunsMetadata(runID) def setTransformationRunStatus(self, transID, runID, status): return DIRAC.S_OK() def setTransformationRunsSite(self, transID, runID, site): return DIRAC.S_OK() def setFileStatusForTransformation(self, transID, status, lfns): return DIRAC.S_OK() def addTransformationRunFiles(self, transID, run, lfns): return DIRAC.S_OK() def setDestinationForRun(self, runID, site): return DIRAC.S_OK() def getDestinationForRun(self, runID): return self.transClient.getDestinationForRun(runID) def prepareForPlugin(self, lfns): import time print "Preparing the plugin input data (%d files)" % len(lfns) type = self.trans.getType()['Value'] if not lfns: return (None, None) res = self.bk.getFileMetadata(lfns) if res['OK']: files = [] for lfn, metadata in res['Value']['Successful'].iteritems(): runID = metadata.get('RunNumber', 0) runDict = {"RunNumber": runID, "LFN": lfn} files.append(runDict) else: print "Error getting BK metadata", res['Message'] return ([], {}) replicas = {} startTime = time.time() from DIRAC.Core.Utilities.List import breakListIntoChunks for lfnChunk in breakListIntoChunks(lfns, 200): # print lfnChunk if type.lower() in ("replication", "removal"): res = self.dm.getReplicas(lfnChunk, getUrl=False) else: res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False) # print res if res['OK']: for lfn, ses in res['Value']['Successful'].iteritems(): if ses: replicas[lfn] = sorted(ses) else: print "Error getting replicas of %d files:" % len( lfns), res['Message'] print "Obtained replicas of %d files in %.3f seconds" % ( len(lfns), time.time() - startTime) return (files, replicas)
flushedRuns = [ run['RunNumber'] for run in queryRuns if run['Status'] == 'Flush' ] toBeFlushed = [] # Get runs not flushed in current production but flushed in the parent production for run in [ run['RunNumber'] for run in runs if run['Status'] != 'Flush' and run['RunNumber'] in flushedRuns ]: missing = -1 res = transClient.getTransformationFiles({ 'TransformationID': queryProd, 'RunNumber': run }) if not res['OK']: gLogger.fatal("Error getting files for run %d" % run, res['Message']) else: runFiles = res['Value'] missing = 0 for rFile in runFiles: if rFile['Status'] in ('Unused', 'Assigned', 'MaxReset'): missing += 1 if not missing: toBeFlushed.append(run)
cc.prod = prod except RuntimeError as e: gLogger.exception(lException=e) continue if fileType and cc.transType in ('Merge', 'MCMerge'): gLogger.notice("It is not allowed to select file type for merging transformation", prod) continue cc.verbose = verbose cc.noFC = noFC cc.descendantsDepth = depth if prod != prodList[0]: gLogger.notice("====================") gLogger.notice("Processing %s production %d" % (cc.transType, cc.prod)) if status: res = tr.getTransformationFiles({'TransformationID': prod, 'Status': status}) if res['OK']: lfnList = [trFile['LFN'] for trFile in res['Value']] gLogger.notice('Found %d files with status %s' % (len(lfnList), status)) else: gLogger.fatal("Error getting files %s" % status, res['Message']) DIRAC.exit(2) if not lfnList: continue cc.lfns = lfnList if not fileType: bkQuery = BKQuery({'Production': prod, 'FileType': 'ALL', 'Visible': 'All'}) cc.fileType = bkQuery.getBKFileTypes() gLogger.notice("Looking for descendants of type %s" % str(cc.fileType)) notAllFileTypes = False
class DataRecoveryAgent( AgentModule ): def __init__(self, *args, **kwargs): AgentModule.__init__( self, *args, **kwargs ) self.name = 'DataRecoveryAgent' self.log = gLogger ############################################################################# def initialize(self): """Sets defaults """ self.enableFlag = '' #defined below self.replicaManager = ReplicaManager() self.prodDB = TransformationClient() self.requestClient = RequestClient() self.taskIDName = 'TaskID' self.externalStatus = 'ExternalStatus' self.externalID = 'ExternalID' self.am_setOption('PollingTime',2*60*60) #no stalled jobs are considered so can be frequent self.enableFlag = self.am_getOption('EnableFlag', False) self.am_setModuleParam("shifterProxy", "ProductionManager") self.ops = Operations() return S_OK() ############################################################################# def execute(self): """ The main execution method. """ self.log.info('Enable flag is %s' % self.enableFlag) self.removalOKFlag = True transformationTypes = ['MCReconstruction', 'MCSimulation', 'MCReconstruction_Overlay', 'Merge'] transformationStatus = ['Active', 'Completing'] fileSelectionStatus = ['Assigned', 'MaxReset'] updateStatus = 'Unused' wmsStatusList = ['Failed'] #only worry about files > 12hrs since last update selectDelay = self.am_getOption("Delay", 2) #hours transformationDict = {} for transStatus in transformationStatus: result = self.getEligibleTransformations(transStatus, transformationTypes) if not result['OK']: self.log.error(result) return S_ERROR('Could not obtain eligible transformations for status "%s"' % (transStatus)) if not result['Value']: self.log.info('No "%s" transformations of types %s to process.' % (transStatus, string.join(transformationTypes, ', '))) continue transformationDict.update(result['Value']) self.log.info('Selected %s transformations of types %s' % (len(transformationDict.keys()), string.join(transformationTypes, ', '))) self.log.verbose('The following transformations were selected out of %s:\n%s' % (string.join(transformationTypes, ', '), string.join(transformationDict.keys(), ', '))) trans = [] #initially this was useful for restricting the considered list #now we use the DataRecoveryAgent in setups where IDs are low ignoreLessThan = self.ops.getValue("Transformations/IgnoreLessThan", '724') if trans: self.log.info('Skipping all transformations except %s' % (string.join(trans, ', '))) for transformation, typeName in transformationDict.items(): if trans: if not transformation in trans: continue if ignoreLessThan: if int(ignoreLessThan) > int(transformation): self.log.verbose('Ignoring transformation %s ( is less than specified limit %s )' % (transformation, ignoreLessThan)) continue self.log.info('='*len('Looking at transformation %s type %s:' % (transformation, typeName))) self.log.info('Looking at transformation %s:' % (transformation)) result = self.selectTransformationFiles(transformation, fileSelectionStatus) if not result['OK']: self.log.error(result) self.log.error('Could not select files for transformation %s' % transformation) continue if not result['Value']: self.log.info('No files in status %s selected for transformation %s' % (string.join(fileSelectionStatus, ', '), transformation)) continue fileDict = result['Value'] result = self.obtainWMSJobIDs(transformation, fileDict, selectDelay, wmsStatusList) if not result['OK']: self.log.error(result) self.log.error('Could not obtain WMS jobIDs for files of transformation %s' % (transformation)) continue if not result['Value']: self.log.info('No eligible WMS jobIDs found for %s files in list:\n%s ...' % (len(fileDict.keys()), fileDict.keys()[0])) continue jobFileDict = result['Value'] fileCount = 0 for lfnList in jobFileDict.values(): fileCount += len(lfnList) if not fileCount: self.log.info('No files were selected for transformation %s after examining WMS jobs.' % transformation) continue self.log.info('%s files are selected after examining related WMS jobs' % (fileCount)) result = self.checkOutstandingRequests(jobFileDict) if not result['OK']: self.log.error(result) continue if not result['Value']: self.log.info('No WMS jobs without pending requests to process.') continue jobFileNoRequestsDict = result['Value'] fileCount = 0 for lfnList in jobFileNoRequestsDict.values(): fileCount += len(lfnList) self.log.info('%s files are selected after removing any relating to jobs with pending requests' % (fileCount)) result = self.checkDescendents(transformation, fileDict, jobFileNoRequestsDict) if not result['OK']: self.log.error(result) continue jobsWithFilesOKToUpdate = result['Value']['filesToMarkUnused'] jobsWithFilesProcessed = result['Value']['filesprocessed'] self.log.info('====> Transformation %s total files that can be updated now: %s' % (transformation, len(jobsWithFilesOKToUpdate))) filesToUpdateUnused = [] for fileList in jobsWithFilesOKToUpdate: filesToUpdateUnused.append(fileList) if len(filesToUpdateUnused): result = self.updateFileStatus(transformation, filesToUpdateUnused, updateStatus) if not result['OK']: self.log.error('Recoverable files were not updated with result:\n%s' % (result['Message'])) continue else: self.log.info('There are no files with failed jobs to update for production %s in this cycle' % transformation) filesToUpdateProcessed = [] for fileList in jobsWithFilesProcessed: filesToUpdateProcessed.append(fileList) if len(filesToUpdateProcessed): result = self.updateFileStatus(transformation, filesToUpdateProcessed, 'Processed') if not result['OK']: self.log.error('Recoverable files were not updated with result:\n%s' % (result['Message'])) continue else: self.log.info('There are no files processed to update for production %s in this cycle' % transformation) return S_OK() ############################################################################# def getEligibleTransformations(self, status, typeList): """ Select transformations of given status and type. """ res = self.prodDB.getTransformations(condDict = {'Status' : status, 'Type' : typeList}) self.log.debug(res) if not res['OK']: return res transformations = {} for prod in res['Value']: prodID = prod['TransformationID'] transformations[str(prodID)] = prod['Type'] return S_OK(transformations) ############################################################################# def selectTransformationFiles(self, transformation, statusList): """ Select files, production jobIDs in specified file status for a given transformation. """ #Until a query for files with timestamp can be obtained must rely on the #WMS job last update res = self.prodDB.getTransformationFiles(condDict = {'TransformationID' : transformation, 'Status' : statusList}) self.log.debug(res) if not res['OK']: return res resDict = {} for fileDict in res['Value']: if not fileDict.has_key('LFN') or not fileDict.has_key(self.taskIDName) or not fileDict.has_key('LastUpdate'): self.log.info('LFN, %s and LastUpdate are mandatory, >=1 are missing for:\n%s' % (self.taskIDName, fileDict)) continue lfn = fileDict['LFN'] jobID = fileDict[self.taskIDName] resDict[lfn] = jobID if resDict: self.log.info('Selected %s files overall for transformation %s' % (len(resDict.keys()), transformation)) return S_OK(resDict) ############################################################################# def obtainWMSJobIDs(self, transformation, fileDict, selectDelay, wmsStatusList): """ Group files by the corresponding WMS jobIDs, check the corresponding jobs have not been updated for the delay time. Can't get into any mess because we start from files only in MaxReset / Assigned and check corresponding jobs. Mixtures of files for jobs in MaxReset and Assigned statuses only possibly include some files in Unused status (not Processed for example) that will not be touched. """ prodJobIDs = uniqueElements(fileDict.values()) self.log.info('The following %s production jobIDs apply to the selected files:\n%s' % (len(prodJobIDs), prodJobIDs)) jobFileDict = {} condDict = {'TransformationID' : transformation, self.taskIDName : prodJobIDs} delta = datetime.timedelta( hours = selectDelay ) now = dateTime() olderThan = now-delta res = self.prodDB.getTransformationTasks(condDict = condDict, older = olderThan, timeStamp = 'LastUpdateTime', inputVector = True) self.log.debug(res) if not res['OK']: self.log.error('getTransformationTasks returned an error:\n%s') return res for jobDict in res['Value']: missingKey = False for key in [self.taskIDName, self.externalID, 'LastUpdateTime', self.externalStatus, 'InputVector']: if not jobDict.has_key(key): self.log.info('Missing key %s for job dictionary, the following is available:\n%s' % (key, jobDict)) missingKey = True continue if missingKey: continue job = jobDict[self.taskIDName] wmsID = jobDict[self.externalID] lastUpdate = jobDict['LastUpdateTime'] wmsStatus = jobDict[self.externalStatus] jobInputData = jobDict['InputVector'] jobInputData = [lfn.replace('LFN:','') for lfn in jobInputData.split(';')] if not int(wmsID): self.log.info('Prod job %s status is %s (ID = %s) so will not recheck with WMS' %(job, wmsStatus, wmsID)) continue self.log.info('Job %s, prod job %s last update %s, production management system status %s' % (wmsID, job, lastUpdate, wmsStatus)) #Exclude jobs not having appropriate WMS status - have to trust that production management status is correct if not wmsStatus in wmsStatusList: self.log.info('Job %s is in status %s, not %s so will be ignored' % (wmsID, wmsStatus, string.join(wmsStatusList, ', '))) continue finalJobData = [] #Must map unique files -> jobs in expected state for lfn,prodID in fileDict.items(): if int(prodID) == int(job): finalJobData.append(lfn) self.log.info('Found %s files for job %s' % (len(finalJobData), job)) jobFileDict[wmsID] = finalJobData return S_OK(jobFileDict) ############################################################################# def checkOutstandingRequests(self, jobFileDict): """ Before doing anything check that no outstanding requests are pending for the set of WMS jobIDs. """ jobs = jobFileDict.keys() result = self.requestClient.getRequestForJobs(jobs) if not result['OK']: return result if not result['Value']: self.log.info('None of the jobs have pending requests') return S_OK(jobFileDict) for jobID in result['Value'].keys(): del jobFileDict[str(jobID)] self.log.info('Removing jobID %s from consideration until requests are completed' % (jobID)) return S_OK(jobFileDict) ############################################################################ def checkDescendents(self, transformation, filedict, jobFileDict): """ look that all jobs produced, or not output """ res = self.prodDB.getTransformationParameters(transformation, ['Body']) if not res['OK']: self.log.error('Could not get Body from TransformationDB') return res body = res['Value'] workflow = fromXMLString(body) workflow.resolveGlobalVars() olist = [] jtype = workflow.findParameter('JobType') if not jtype: self.log.error('Type for transformation %d was not defined' % transformation) return S_ERROR('Type for transformation %d was not defined' % transformation) for step in workflow.step_instances: param = step.findParameter('listoutput') if not param: continue olist.extend(param.value) expectedlfns = [] contactfailed = [] fileprocessed = [] files = [] tasks_to_be_checked = {} for files in jobFileDict.values(): for f in files: if f in filedict: tasks_to_be_checked[f] = filedict[f] #get the tasks that need to be checked for filep, task in tasks_to_be_checked.items(): commons = {} commons['outputList'] = olist commons['PRODUCTION_ID'] = transformation commons['JOB_ID'] = task commons['JobType'] = jtype out = constructProductionLFNs(commons) expectedlfns = out['Value']['ProductionOutputData'] res = self.replicaManager.getCatalogFileMetadata(expectedlfns) if not res['OK']: self.log.error('Getting metadata failed') contactfailed.append(filep) continue if not filep in files: files.append(filep) success = res['Value']['Successful'].keys() failed = res['Value']['Failed'].keys() if len(success) and not len(failed): fileprocessed.append(filep) final_list_unused = files for file_all in files: if file_all in fileprocessed: try: final_list_unused.remove(filep) except: self.log.warn("Item not in list anymore") result = {'filesprocessed' : fileprocessed, 'filesToMarkUnused' : final_list_unused} return S_OK(result) ############################################################################# def updateFileStatus(self, transformation, fileList, fileStatus): """ Update file list to specified status. """ if not self.enableFlag: self.log.info('Enable flag is False, would update %s files to "%s" status for %s' % (len(fileList), fileStatus, transformation)) return S_OK() self.log.info('Updating %s files to "%s" status for %s' % (len(fileList), fileStatus, transformation)) result = self.prodDB.setFileStatusForTransformation(int(transformation), fileStatus, fileList, force = True) self.log.debug(result) if not result['OK']: self.log.error(result) return result if result['Value']['Failed']: self.log.error(result['Value']['Failed']) return result msg = result['Value']['Successful'] for lfn, message in msg.items(): self.log.info('%s => %s' % (lfn, message)) return S_OK()
class TransformationAgent(AgentModule): def initialize(self): """ standard init """ self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin') self.checkCatalog = self.am_getOption('CheckCatalog', 'yes') self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush']) self.maxFiles = self.am_getOption('MaxFiles', 5000) self.am_setOption('shifterProxy', 'ProductionManager') self.transDB = TransformationClient('TransformationDB') self.rm = ReplicaManager() self.unusedFiles = {} return S_OK() def execute(self): """ get and process the transformations to be processed """ res = self.getTransformations() if not res['OK']: gLogger.info("execute: Failed to obtain transformations: %s" % res['Message']) return S_OK() # Process the transformations for transDict in res['Value']: transID = long(transDict['TransformationID']) gLogger.info("execute: Processing transformation %s." % transID) startTime = time.time() res = self.processTransformation(transDict) if not res['OK']: gLogger.info("execute: Failed to process transformation: %s" % res['Message']) else: gLogger.info( "execute: Processed transformation in %.1f seconds" % (time.time() - startTime)) return S_OK() def getTransformations(self): """ Obtain the transformations to be executed """ transName = self.am_getOption('Transformation', 'All') if transName == 'All': gLogger.info( "getTransformations: Initializing general purpose agent.") res = self.transDB.getTransformations( {'Status': self.transformationStatus}, extraParams=True) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message']) return res transformations = res['Value'] gLogger.info( "getTransformations: Obtained %d transformations to process" % len(transformations)) else: gLogger.info( "getTransformations: Initializing for transformation %s." % transName) res = self.transDB.getTransformation(transName, extraParams=True) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message']) return res transformations = [res['Value']] return S_OK(transformations) def processTransformation(self, transDict): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles(condDict={ 'TransformationID': transID, 'Status': 'Unused' }) if not res['OK']: gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message']) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "processTransformation: No 'Unused' files found for transformation." ) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message']) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() #Check if something new happened if len(lfns) == self.unusedFiles.get( transID, 0) and transDict['Status'] != 'Flush': gLogger.info( "processTransformation: No new 'Unused' files found for transformation." ) return S_OK() replicateOrRemove = transDict['Type'].lower() in [ "replication", "removal" ] # Limit the number of LFNs to be considered for replication or removal as they are treated individually if replicateOrRemove: lfns = lfns[0:self.maxFiles - 1] unusedFiles = len(lfns) # Check the data is available with replicas res = self.__getDataReplicas(transID, lfns, active=not replicateOrRemove) if not res['OK']: gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message']) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key('Plugin') and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin) res = self.__generatePluginObject(plugin) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters(transDict) oPlugin.setInputData(dataReplicas) oPlugin.setTransformationFiles(transFiles) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message']) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation(transID, lfns, se) if not res['OK']: gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message']) allCreated = False else: created += 1 unusedFiles -= len(lfns) if created: gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created) self.unusedFiles[transID] = unusedFiles # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message']) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject(self, plugin): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin']) except ImportError, e: gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % (plugin, e)) return S_ERROR() try: plugin_o = getattr(plugModule, 'TransformationPlugin')( '%s' % plugin, transClient=self.transDB, replicaManager=self.rm) return S_OK(plugin_o) except AttributeError, e: gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % (plugin, e)) return S_ERROR()
class TaskManagerAgentBase( AgentModule ): ''' To be extended. The extension needs to: - provide a taskManager object as data member - provide a shifterProxy (string) as data member - provide a transType (list of strings) as data member ''' def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) self.taskManager = None self.shifterProxy = '' self.transClient = TransformationClient() self.transType = [] ############################################################################# def initialize( self ): ''' agent initialization ''' if not self.taskManager: return S_ERROR( 'No task manager provided!' ) if not self.shifterProxy: return S_ERROR( 'No shifter proxy provided!' ) self.am_setOption( 'shifterProxy', self.shifterProxy ) if not self.transType: return S_ERROR( 'No transformation types to look for!' ) gLogger.info( "Looking for %s" % self.transType ) gMonitor.registerActivity( "SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks", gMonitor.OP_ACUM ) return S_OK() ############################################################################# def execute( self ): ''' The TaskManagerBase execution method. ''' # Determine whether the task status is to be monitored and updated enableTaskMonitor = self.am_getOption( 'MonitorTasks', '' ) if not enableTaskMonitor: gLogger.info( "execute: Monitoring of tasks is disabled." ) gLogger.info( "execute: To enable create the 'MonitorTasks' option" ) else: res = self.updateTaskStatus() if not res['OK']: gLogger.warn( 'execute: Failed to update task states', res['Message'] ) # Determine whether the task files status is to be monitored and updated enableFileMonitor = self.am_getOption( 'MonitorFiles', '' ) if not enableFileMonitor: gLogger.info( "execute: Monitoring of files is disabled." ) gLogger.info( "execute: To enable create the 'MonitorFiles' option" ) else: res = self.updateFileStatus() if not res['OK']: gLogger.warn( 'execute: Failed to update file states', res['Message'] ) # Determine whether the checking of reserved tasks is to be performed enableCheckReserved = self.am_getOption( 'CheckReserved', '' ) if not enableCheckReserved: gLogger.info( "execute: Checking of reserved tasks is disabled." ) gLogger.info( "execute: To enable create the 'CheckReserved' option" ) else: res = self.checkReservedTasks() if not res['OK']: gLogger.warn( 'execute: Failed to checked reserved tasks', res['Message'] ) # Determine whether the submission of tasks is to be executed enableSubmission = self.am_getOption( 'SubmitTasks', '' ) if not enableSubmission: gLogger.info( "execute: Submission of tasks is disabled." ) gLogger.info( "execute: To enable create the 'SubmitTasks' option" ) else: res = self.submitTasks() if not res['OK']: gLogger.warn( 'execute: Failed to submit created tasks', res['Message'] ) return S_OK() def _selectTransformations( self, transType = [], status = ['Active', 'Completing'], agentType = ['Automatic'] ): ''' get the transformations ''' selectCond = {} if status: selectCond['Status'] = status if transType: selectCond['Type'] = transType if agentType: selectCond['AgentType'] = agentType res = self.transClient.getTransformations( condDict = selectCond ) if not res['OK']: gLogger.error( "_selectTransformations: Failed to get transformations for selection.", res['Message'] ) elif not res['Value']: gLogger.info( "_selectTransformations: No transformations found for selection." ) else: gLogger.info( "_selectTransformations: Obtained %d transformations for selection" % len( res['Value'] ) ) return res def updateTaskStatus( self ): ''' Updates the task status ''' gLogger.info( "updateTaskStatus: Updating the Status of tasks" ) # Get the transformations to be updated status = self.am_getOption( 'UpdateTasksStatus', ['Active', 'Completing', 'Stopped'] ) res = self._selectTransformations( transType = self.transType, status = status, agentType = [] ) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] # Get the tasks which are in a UPDATE state updateStatus = self.am_getOption( 'TaskUpdateStatus', ['Checking', 'Deleted', 'Killed', 'Staging', 'Stalled', 'Matched', 'Rescheduled', 'Completed', 'Submitted', 'Received', 'Waiting', 'Running'] ) condDict = {"TransformationID":transID, "ExternalStatus":updateStatus} timeStamp = str( datetime.datetime.utcnow() - datetime.timedelta( minutes = 10 ) ) res = self.transClient.getTransformationTasks( condDict = condDict, older = timeStamp, timeStamp = 'LastUpdateTime' ) if not res['OK']: gLogger.error( "updateTaskStatus: Failed to get tasks to update for transformation", "%s %s" % ( transID, res['Message'] ) ) continue if not res['Value']: gLogger.verbose( "updateTaskStatus: No tasks found to update for transformation %s" % transID ) continue res = self.taskManager.getSubmittedTaskStatus( res['Value'] ) if not res['OK']: gLogger.error( "updateTaskStatus: Failed to get updated task statuses for transformation", "%s %s" % ( transID, res['Message'] ) ) continue statusDict = res['Value'] for status in sortList( statusDict.keys() ): taskIDs = statusDict[status] gLogger.info( "updateTaskStatus: Updating %d task(s) from transformation %d to %s" % ( len( taskIDs ), transID, status ) ) res = self.transClient.setTaskStatus( transID, taskIDs, status ) if not res['OK']: gLogger.error( "updateTaskStatus: Failed to update task status for transformation", "%s %s" % ( transID, res['Message'] ) ) gLogger.info( "updateTaskStatus: Transformation task status update complete" ) return S_OK() def updateFileStatus( self ): ''' Update the files status ''' gLogger.info( "updateFileStatus: Updating Status of task files" ) #Get the transformations to be updated status = self.am_getOption( 'UpdateFilesStatus', ['Active', 'Completing', 'Stopped'] ) res = self._selectTransformations( transType = self.transType, status = status, agentType = [] ) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] timeStamp = str( datetime.datetime.utcnow() - datetime.timedelta( minutes = 10 ) ) condDict = {'TransformationID' : transID, 'Status' : ['Assigned']} res = self.transClient.getTransformationFiles( condDict = condDict, older = timeStamp, timeStamp = 'LastUpdate' ) if not res['OK']: gLogger.error( "updateFileStatus: Failed to get transformation files to update.", res['Message'] ) continue if not res['Value']: gLogger.info( "updateFileStatus: No files to be updated for transformation %s." % transID ) continue res = self.taskManager.getSubmittedFileStatus( res['Value'] ) if not res['OK']: gLogger.error( "updateFileStatus: Failed to get updated file statuses for transformation", "%s %s" % ( transID, res['Message'] ) ) continue statusDict = res['Value'] if not statusDict: gLogger.info( "updateFileStatus: No file statuses to be updated for transformation %s." % transID ) continue fileReport = FileReport( server = self.transClient.getServer() ) for lfn, status in statusDict.items(): fileReport.setFileStatus( int( transID ), lfn, status ) res = fileReport.commit() if not res['OK']: gLogger.error( "updateFileStatus: Failed to update file status for transformation", "%s %s" % ( transID, res['Message'] ) ) else: for status, update in res['Value'].items(): gLogger.info( "updateFileStatus: Updated %s files for %s to %s." % ( update, transID, status ) ) gLogger.info( "updateFileStatus: Transformation file status update complete" ) return S_OK() def checkReservedTasks( self ): gLogger.info( "checkReservedTasks: Checking Reserved tasks" ) # Get the transformations which should be checked status = self.am_getOption( 'CheckReservedStatus', ['Active', 'Completing', 'Stopped'] ) res = self._selectTransformations( transType = self.transType, status = status, agentType = [] ) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations condDict = {"TransformationID":transID, "ExternalStatus":'Reserved'} time_stamp_older = str( datetime.datetime.utcnow() - datetime.timedelta( hours = 1 ) ) time_stamp_newer = str( datetime.datetime.utcnow() - datetime.timedelta( days = 7 ) ) res = self.transClient.getTransformationTasks( condDict = condDict, older = time_stamp_older, newer = time_stamp_newer ) if not res['OK']: gLogger.error( "checkReservedTasks: Failed to get Reserved tasks for transformation", "%s %s" % ( transID, res['Message'] ) ) continue if not res['Value']: gLogger.verbose( "checkReservedTasks: No Reserved tasks found for transformation %s" % transID ) continue res = self.taskManager.updateTransformationReservedTasks( res['Value'] ) if not res['OK']: gLogger.info( "checkReservedTasks: No Reserved tasks found for transformation %s" % transID ) continue noTasks = res['Value']['NoTasks'] taskNameIDs = res['Value']['TaskNameIDs'] # For the tasks with no associated request found re-set the status of the task in the transformationDB for taskName in noTasks: transID, taskID = taskName.split( '_' ) gLogger.info( "checkReservedTasks: Resetting status of %s to Created as no associated task found" % ( taskName ) ) res = self.transClient.setTaskStatus( int( transID ), int( taskID ), 'Created' ) if not res['OK']: gLogger.warn( "checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % ( taskName, res['Message'] ) ) # For the tasks for which an associated request was found update the task details in the transformationDB for taskName, extTaskID in taskNameIDs.items(): transID, taskID = taskName.split( '_' ) gLogger.info( "checkReservedTasks: Resetting status of %s to Created with ID %s" % ( taskName, extTaskID ) ) res = self.transClient.setTaskStatusAndWmsID( int( transID ), int( taskID ), 'Submitted', str( extTaskID ) ) if not res['OK']: gLogger.warn( "checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % ( taskName, res['Message'] ) ) gLogger.info( "checkReservedTasks: Updating of reserved tasks complete" ) return S_OK() def submitTasks( self ): """ Submit the tasks to an external system, using the taskManager provided """ gLogger.info( "submitTasks: Submitting tasks for transformations" ) res = getProxyInfo( False, False ) if not res['OK']: gLogger.error( "submitTasks: Failed to determine credentials for submission", res['Message'] ) return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] gLogger.info( "submitTasks: Tasks will be submitted with the credentials %s:%s" % ( owner, ownerGroup ) ) # Get the transformations which should be submitted tasksPerLoop = self.am_getOption( 'TasksPerLoop', 50 ) status = self.am_getOption( 'SubmitStatus', ['Active', 'Completing'] ) res = self._selectTransformations( transType = self.transType, status = status ) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] transBody = transformation['Body'] res = self.transClient.getTasksToSubmit( transID, tasksPerLoop ) if not res['OK']: gLogger.error( "submitTasks: Failed to obtain tasks for transformation", "%s %s" % ( transID, res['Message'] ) ) continue tasks = res['Value']['JobDictionary'] if not tasks: gLogger.verbose( "submitTasks: No tasks found for submission for transformation %s" % transID ) continue gLogger.info( "submitTasks: Obtained %d tasks for submission for transformation %s" % ( len( tasks ), transID ) ) res = self.taskManager.prepareTransformationTasks( transBody, tasks, owner, ownerGroup ) if not res['OK']: gLogger.error( "submitTasks: Failed to prepare tasks for transformation", "%s %s" % ( transID, res['Message'] ) ) continue res = self.taskManager.submitTransformationTasks( res['Value'] ) if not res['OK']: gLogger.error( "submitTasks: Failed to submit prepared tasks for transformation", "%s %s" % ( transID, res['Message'] ) ) continue res = self.taskManager.updateDBAfterTaskSubmission( res['Value'] ) if not res['OK']: gLogger.error( "submitTasks: Failed to update DB after task submission for transformation", "%s %s" % ( transID, res['Message'] ) ) continue gLogger.info( "submitTasks: Submission of transformation tasks complete" ) return S_OK()
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { 'TransformationID': [types.IntType, types.LongType], 'TransformationName': types.StringTypes, 'Status': types.StringTypes, 'Description': types.StringTypes, 'LongDescription': types.StringTypes, 'Type': types.StringTypes, 'Plugin': types.StringTypes, 'AgentType': types.StringTypes, 'FileMask': types.StringTypes, 'TransformationGroup': types.StringTypes, 'GroupSize': [types.IntType, types.LongType, types.FloatType], 'InheritedFrom': [types.IntType, types.LongType], 'Body': types.StringTypes, 'MaxNumberOfTasks': [types.IntType, types.LongType], 'EventsPerTask': [types.IntType, types.LongType] } self.paramValues = { 'TransformationID': 0, 'TransformationName': '', 'Status': 'New', 'Description': '', 'LongDescription': '', 'Type': '', 'Plugin': 'Standard', 'AgentType': 'Manual', 'FileMask': '', 'TransformationGroup': 'General', 'GroupSize': 1, 'InheritedFrom': 0, 'Body': '', 'MaxNumberOfTasks': 0, 'EventsPerTask': 0 } self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError('TransformationID %d does not exist' % transID) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE('TargetSE', seList) def setSourceSE(self, seList): return self.__setSE('SourceSE', seList) def __setSE(self, seParam, seList): if isinstance(seList, basestring): try: seList = eval(seList) except: seList = seList.split(',') elif isinstance(seList, (list, dict, tuple)): seList = list(seList) else: return S_ERROR("Bad argument type") res = self.__checkSEs(seList) if not res['OK']: return res self.item_called = seParam return self.__setParam(seList) def __getattr__(self, name): if name.find('get') == 0: item = name[3:] self.item_called = item return self.__getParam if name.find('set') == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError(name) def __getParam(self): if self.item_called == 'Available': return S_OK(self.paramTypes.keys()) if self.item_called == 'Parameters': return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError("Unknown parameter for transformation: %s" % self.item_called) def __setParam(self, value): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError("%s %s %s expected one of %s" % (self.item_called, value, type(value), self.paramTypes[self.item_called])) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res['OK']: if printOutput: self._prettyPrint(res) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res['OK']: if printOutput: self._prettyPrint(res) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate') return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation('extendTransformation', nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation('cleanTransformation', printOutput=printOutput) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation('deleteTransformation', printOutput=printOutput) if res['OK']: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation('addFilesToTransformation', lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation('setFileStatusForTransformation', status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation('getTransformationTaskStats', printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation('getTransformationStats', printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation('deleteTasks', taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se='Unknown', printOutput=False): return self.__executeOperation('addTaskForTransformation', lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation('setTaskStatus', taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop('printOutput') fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles(self, fileStatus=[], lfns=[], outputFields=[ 'FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate' ], orderBy='FileID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'FileID', orderBy) return res def getTransformationTasks(self, taskStatus=[], taskIDs=[], outputFields=[ 'TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime' ], orderBy='TaskID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TaskID', orderBy) return res ############################################################################# def getTransformations(self, transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate' ], orderBy='TransformationID', printOutput=False): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getAuthorDNfromProxy(self): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error("Unable to get uploaded proxy Info %s " % res['Message']) return S_ERROR(res['Message']) res = {'username': username, 'authorDN': author} return S_OK(res) ############################################################################# def getTransformationsByUser(self, authorDN="", userName="", transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN' ], orderBy='TransformationID', printOutput=False): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error(res['Message']) return S_ERROR(res['Message']) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not (userName == "" or userName == foundUserName): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % (userName, ', '.join(transStatus))) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % (authorDN, ', '.join(transStatus))) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getSummaryTransformations(self, transID=[]): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = {'TransformationID': transID} orderby = [] start = 0 maxitems = len(transID) paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\ 'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\ 'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\ 'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems) if not result['OK']: self._prettyPrint(result) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map( lambda pname: paramValues[paramNames.index(pname)], paramShowNames) showDict = dict(zip(paramShowNamesShort, paramShowValues)) dictList.append(showDict) except Exception, x: print 'Exception %s ' % str(x) if not len(dictList) > 0: gLogger.error( 'No found transformations satisfying input condition') return S_ERROR( 'No found transformations satisfying input condition') else: print self._printFormattedDictList(dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0]) return S_OK(dictList)
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.supportedPlugins = ['Broadcast', 'Standard', 'BySize', 'ByShare'] if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError, 'TransformationID %d does not exist' % transID else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def __setSE( self, se, seList ): if type( seList ) in types.StringTypes: try: seList = eval( seList ) except: seList = seList.replace( ',', ' ' ).split() res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = se return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam( self, value ): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key( self.item_called ): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def addTransformation( self, addFiles = True, printOutput = False ): res = self._checkCreation() if not res['OK']: return self._errorReport( res, 'Failed transformation sanity check' ) if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint( self.paramValues ) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup = self.paramValues['TransformationGroup'], groupSize = self.paramValues['GroupSize'], inheritedFrom = self.paramValues['InheritedFrom'], body = self.paramValues['Body'], maxTasks = self.paramValues['MaxNumberOfTasks'], eventsPerTask = self.paramValues['EventsPerTask'], addFiles = addFiles ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transID = res['Value'] self.exists = True self.setTransformationID( transID ) gLogger.info( "Created transformation %d" % transID ) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key( paramName ): res = self.transClient.setTransformationParameter( transID, paramName, paramValue ) if not res['OK']: gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) ) gLogger.info( "To add this parameter later please execute the following." ) gLogger.info( "oTransformation = Transformation(%d)" % transID ) gLogger.info( "oTransformation.set%s(...)" % paramName ) return S_OK( transID ) def _checkCreation( self ): if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info( "oTransformation.reset()" ) return S_ERROR() requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type'] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter ) res = self.__promptForParameter( parameter ) if not res['OK']: return res plugin = self.paramValues['Plugin'] if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin ) res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' ) if not res['OK']: return res plugin = self.paramValues['Plugin'] checkPlugin = "_check%sPlugin" % plugin fcn = None if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ): fcn = getattr( self, checkPlugin ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin ) res = fcn() return res def _checkBySizePlugin( self ): return self._checkStandardPlugin() def _checkBySharePlugin( self ): return self._checkStandardPlugin() def _checkStandardPlugin( self ): groupSize = self.paramValues['GroupSize'] if ( groupSize <= 0 ): gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize( 1 ) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin( self ): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( string.join( ['SourceSE', 'TargetSE'], ', ' ) ) ) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if ( not self.paramValues.has_key( requiredParam ) ) or ( not self.paramValues[requiredParam] ): res = self.__promptForParameter( requiredParam, insert = False ) if not res['OK']: return res paramValue = res['Value'] setter = None setterName = "set%s" % requiredParam if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName ) ses = paramValue.replace( ',', ' ' ).split() res = setter( ses ) if not res['OK']: return res return S_OK() def __checkSEs( self, seList ): res = gConfig.getSections( '/Resources/StorageElements' ) if not res['OK']: return self._errorReport( res, 'Failed to get possible StorageElements' ) missing = [] for se in seList: if not se in res['Value']: gLogger.error( "StorageElement %s is not known" % se ) missing.append( se ) if missing: return S_ERROR( "%d StorageElements not known" % len( missing ) ) return S_OK() def __promptForParameter( self, parameter, choices = [], default = '', insert = True ): res = promptUser( "Please enter %s" % parameter, choices = choices, default = default ) if not res['OK']: return self._errorReport( res ) gLogger.info( "%s will be set to '%s'" % ( parameter, res['Value'] ) ) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter( paramValue ) if not res['OK']: return res return S_OK( paramValue )
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """c'tor""" super(Transformation, self).__init__() self.paramTypes = { "TransformationID": six.integer_types, "TransformationName": six.string_types, "Status": six.string_types, "Description": six.string_types, "LongDescription": six.string_types, "Type": six.string_types, "Plugin": six.string_types, "AgentType": six.string_types, "FileMask": six.string_types, "TransformationGroup": six.string_types, "GroupSize": six.integer_types + (float, ), "InheritedFrom": six.integer_types, "Body": six.string_types, "MaxNumberOfTasks": six.integer_types, "EventsPerTask": six.integer_types, } self.paramValues = { "TransformationID": 0, "TransformationName": "", "Status": "New", "Description": "", "LongDescription": "", "Type": "", "Plugin": "Standard", "AgentType": "Manual", "FileMask": "", "TransformationGroup": "General", "GroupSize": 1, "InheritedFrom": 0, "Body": "", "MaxNumberOfTasks": 0, "EventsPerTask": 0, } # the metaquery parameters are neither part of the transformation parameters nor the additional parameters, so # special treatment is necessary self.inputMetaQuery = None self.outputMetaQuery = None self.ops = Operations() self.supportedPlugins = self.ops.getValue( "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"]) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues["TransformationID"] = transID res = self.getTransformation() if res["OK"]: self.exists = True elif res["Message"] == "Transformation does not exist": raise AttributeError("TransformationID %d does not exist" % transID) else: self.paramValues["TransformationID"] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE("TargetSE", seList) def setSourceSE(self, seList): return self.__setSE("SourceSE", seList) def setBody(self, body): """check that the body is a string, or using the proper syntax for multiple operations, or is a BodyPlugin object :param body: transformation body, for example .. code :: python body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }), ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ), ] :type body: string or list of tuples (or lists) of string and dictionaries or a Body plugin (:py:class:`DIRAC.TransformationSystem.Client.BodyPlugin.BaseBody.BaseBody`) :raises TypeError: If the structure is not as expected :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used :returns: S_OK, S_ERROR """ self.item_called = "Body" # Simple single operation body case if isinstance(body, six.string_types): return self.__setParam(body) # BodyPlugin case elif isinstance(body, BaseBody): return self.__setParam(encode(body)) if not isinstance(body, (list, tuple)): raise TypeError("Expected list or string, but %r is %s" % (body, type(body))) # MultiOperation body case for tup in body: if not isinstance(tup, (tuple, list)): raise TypeError("Expected tuple or list, but %r is %s" % (tup, type(tup))) if len(tup) != 2: raise TypeError("Expected 2-tuple, but %r is length %d" % (tup, len(tup))) if not isinstance(tup[0], six.string_types): raise TypeError( "Expected string, but first entry in tuple %r is %s" % (tup, type(tup[0]))) if not isinstance(tup[1], dict): raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % (tup, type(tup[0]))) for par, val in tup[1].items(): if not isinstance(par, six.string_types): raise TypeError( "Expected string, but key in dictionary %r is %s" % (par, type(par))) if par not in Operation.ATTRIBUTE_NAMES: raise ValueError("Unknown attribute for Operation: %s" % par) if not isinstance( val, six.string_types + six.integer_types + (float, list, tuple, dict)): raise TypeError("Cannot encode %r, in json" % (val)) return self.__setParam(json.dumps(body)) def setInputMetaQuery(self, query): """Set the input meta query. :param dict query: dictionary to use for input meta query """ self.inputMetaQuery = query return S_OK() def setOutputMetaQuery(self, query): """Set the output meta query. :param dict query: dictionary to use for output meta query """ self.outputMetaQuery = query return S_OK() def __setSE(self, seParam, seList): if isinstance(seList, six.string_types): try: seList = eval(seList) except Exception: seList = seList.split(",") elif isinstance(seList, (list, dict, tuple)): seList = list(seList) else: return S_ERROR("Bad argument type") res = self.__checkSEs(seList) if not res["OK"]: return res self.item_called = seParam return self.__setParam(seList) def __getattr__(self, name): if name.find("get") == 0: item = name[3:] self.item_called = item return self.__getParam if name.find("set") == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError(name) def __getParam(self): if self.item_called == "Available": return S_OK(list(self.paramTypes)) if self.item_called == "Parameters": return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError("Unknown parameter for transformation: %s" % self.item_called) def __setParam(self, value): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if isinstance(value, self.paramTypes[self.item_called]): change = True else: raise TypeError("%s %s %s expected one of %s" % (self.item_called, value, type(value), self.paramTypes[self.item_called])) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues["TransformationID"] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res["OK"]: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transParams = res["Value"] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res["OK"]: if printOutput: self._prettyPrint(res) return res loggingList = res["Value"] if printOutput: self._printFormattedDictList( loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate") return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation("cleanTransformation", printOutput=printOutput) if res["OK"]: self.paramValues["Status"] = "Cleaned" return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation("deleteTransformation", printOutput=printOutput) if res["OK"]: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation("getTransformationStats", printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False): return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop("printOutput") fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles( self, fileStatus=[], lfns=[], outputFields=[ "FileID", "LFN", "Status", "TaskID", "TargetSE", "UsedSE", "ErrorCount", "InsertedTime", "LastUpdate", ], orderBy="FileID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if fileStatus: condDict["Status"] = fileStatus if lfns: condDict["LFN"] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy) return res def getTransformationTasks( self, taskStatus=[], taskIDs=[], outputFields=[ "TransformationID", "TaskID", "ExternalStatus", "ExternalID", "TargetSE", "CreationTime", "LastUpdateTime", ], orderBy="TaskID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if taskStatus: condDict["ExternalStatus"] = taskStatus if taskIDs: condDict["TaskID"] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy) return res ############################################################################# def getTransformations( self, transID=[], transStatus=[], outputFields=[ "TransformationID", "Status", "AgentType", "TransformationName", "CreationDate" ], orderBy="TransformationID", printOutput=False, ): condDict = {} if transID: condDict["TransformationID"] = transID if transStatus: condDict["Status"] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy) return res ############################################################################# def getAuthorDNfromProxy(self): """gets the AuthorDN and username of the transformation from the uploaded proxy""" username = "" author = "" res = getProxyInfo() if res["OK"]: author = res["Value"]["identity"] username = res["Value"]["username"] else: gLogger.error("Unable to get uploaded proxy Info %s " % res["Message"]) return S_ERROR(res["Message"]) res = {"username": username, "authorDN": author} return S_OK(res) ############################################################################# def getTransformationsByUser( self, authorDN="", userName="", transID=[], transStatus=[], outputFields=[ "TransformationID", "Status", "AgentType", "TransformationName", "CreationDate", "AuthorDN" ], orderBy="TransformationID", printOutput=False, ): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res["OK"]: gLogger.error(res["Message"]) return S_ERROR(res["Message"]) else: foundUserName = res["Value"]["username"] foundAuthor = res["Value"]["authorDN"] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not (userName == "" or userName == foundUserName): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % (userName, ", ".join(transStatus))) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % (authorDN, ", ".join(transStatus))) condDict["AuthorDN"] = authorDN if transID: condDict["TransformationID"] = transID if transStatus: condDict["Status"] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy) return res ############################################################################# def getSummaryTransformations(self, transID=[]): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = {"TransformationID": transID} orderby = [] start = 0 maxitems = len(transID) paramShowNames = [ "TransformationID", "Type", "Status", "Files_Total", "Files_PercentProcessed", "Files_Processed", "Files_Unused", "Jobs_TotalCreated", "Jobs_Waiting", "Jobs_Running", "Jobs_Done", "Jobs_Failed", "Jobs_Stalled", ] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = [ "TransID", "Type", "Status", "F_Total", "F_Proc.(%)", "F_Proc.", "F_Unused", "J_Created", "J_Wait", "J_Run", "J_Done", "J_Fail", "J_Stalled", ] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems) if not result["OK"]: self._prettyPrint(result) return result if result["Value"]["TotalRecords"] > 0: try: paramNames = result["Value"]["ParameterNames"] for paramValues in result["Value"]["Records"]: paramShowValues = map( lambda pname: paramValues[paramNames.index(pname)], paramShowNames) showDict = dict(zip(paramShowNamesShort, paramShowValues)) dictList.append(showDict) except Exception as x: print("Exception %s " % str(x)) if not len(dictList) > 0: gLogger.error( "No found transformations satisfying input condition") return S_ERROR( "No found transformations satisfying input condition") else: print( self._printFormattedDictList(dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0])) return S_OK(dictList) ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): """Add transformation to the transformation system. Sets all parameters currently assigned to the transformation. :param bool addFiles: if True, immediately perform input data query :param bool printOutput: if True, print information about transformation """ res = self._checkCreation() if not res["OK"]: return self._errorReport(res, "Failed transformation sanity check") if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues["TransformationName"], self.paramValues["Description"], self.paramValues["LongDescription"], self.paramValues["Type"], self.paramValues["Plugin"], self.paramValues["AgentType"], self.paramValues["FileMask"], transformationGroup=self.paramValues["TransformationGroup"], groupSize=self.paramValues["GroupSize"], inheritedFrom=self.paramValues["InheritedFrom"], body=self.paramValues["Body"], maxTasks=self.paramValues["MaxNumberOfTasks"], eventsPerTask=self.paramValues["EventsPerTask"], addFiles=addFiles, inputMetaQuery=self.inputMetaQuery, outputMetaQuery=self.outputMetaQuery, ) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transID = res["Value"] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if paramName not in self.paramTypes: res = self.transClient.setTransformationParameter( transID, paramName, paramValue) if not res["OK"]: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"])) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): """Few checks""" if self.paramValues["TransformationID"]: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = [ "TransformationName", "Description", "LongDescription", "Type" ] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = six.moves.input( "Please enter the value of " + parameter + " ") plugin = self.paramValues["Plugin"] if plugin: if plugin not in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard") if not res["OK"]: return res self.paramValues["Plugin"] = res["Value"] plugin = self.paramValues["Plugin"] return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues["GroupSize"] if groupSize <= 0: gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize(1) if not res["OK"]: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"]))) requiredParams = ["SourceSE", "TargetSE"] for requiredParam in requiredParams: if not self.paramValues.get(requiredParam): paramValue = six.moves.input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable( getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(",", " ").split() res = setter(ses) if not res["OK"]: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections("/Resources/StorageElements") if not res["OK"]: return self._errorReport(res, "Failed to get possible StorageElements") missing = set(seList) - set(res["Value"]) if missing: for se in missing: gLogger.error("StorageElement %s is not known" % se) return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default="", insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res["OK"]: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"])) paramValue = res["Value"] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter(paramValue) if not res["OK"]: return res return S_OK(paramValue)
class TransformationAgent(AgentModule): def initialize(self): self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin') self.checkCatalog = self.am_getOption('CheckCatalog', 'yes') # This sets the Default Proxy to used as that defined under # /Operations/Shifter/ProductionManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'ProductionManager') self.transDB = TransformationClient('TransformationDB') self.rm = ReplicaManager() return S_OK() def execute(self): # Get the transformations to process res = self.getTransformations() if not res['OK']: gLogger.info("%s.execute: Failed to obtain transformations: %s" % (AGENT_NAME, res['Message'])) return S_OK() # Process the transformations for transDict in res['Value']: transID = long(transDict['TransformationID']) gLogger.info("%s.execute: Processing transformation %s." % (AGENT_NAME, transID)) startTime = time.time() res = self.processTransformation(transDict) if not res['OK']: gLogger.info( "%s.execute: Failed to process transformation: %s" % (AGENT_NAME, res['Message'])) else: gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % (AGENT_NAME, time.time() - startTime)) return S_OK() def getTransformations(self): # Obtain the transformations to be executed transName = self.am_getOption('Transformation', 'All') if transName == 'All': gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME) res = self.transDB.getTransformations( {'Status': ['Active', 'Completing', 'Flush']}, extraParams=True) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message']) return res transformations = res['Value'] gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % (AGENT_NAME, len(transformations))) else: gLogger.info( "%s.getTransformations: Initializing for transformation %s." % (AGENT_NAME, transName)) res = self.transDB.getTransformation(transName, extraParams=True) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message']) return res transformations = [res['Value']] return S_OK(transformations) def processTransformation(self, transDict): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles(condDict={ 'TransformationID': transID, 'Status': 'Unused' }) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message']) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message']) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME) return S_OK() # Check the data is available with replicas res = self.__getDataReplicas(transID, lfns, active=(transDict['Type'].lower() not in ["replication", "removal"])) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message']) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key('Plugin') and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % (AGENT_NAME, plugin)) res = self.__generatePluginObject(plugin) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters(transDict) oPlugin.setInputData(dataReplicas) oPlugin.setTransformationFiles(transFiles) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message']) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation(transID, lfns, se) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message']) allCreated = False else: created += 1 if created: gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % (AGENT_NAME, created)) # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message']) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject(self, plugin): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin']) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x) return S_ERROR() try: evalString = "plugModule.TransformationPlugin('%s')" % plugin return S_OK(eval(evalString)) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % (AGENT_NAME, plugin), '', x) return S_ERROR()
if switch[0] == 't' or switch[0] == 'status': status = switch[1].split(',') status = [s.strip() for s in status] tc = TransformationClient() for t in args: res = tc.getTransformation(t) if not res['OK']: gLogger.error('Failed to get transformation information for %s: %s' % (t, res['Message'])) continue selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = tc.getTransformationFiles(condDict=selectDict) if not res['OK']: gLogger.error('Failed to get transformation files: %s' % res['Message']) continue if not res['Value']: gLogger.debug('No file found for transformation %s' % t) continue lfns = [f['LFN'] for f in res['Value']] gLogger.notice('Reset files for status: %s' % status) res = tc.setFileStatusForTransformation(t, 'Unused', lfns) if not res['OK']: gLogger.error('Failed to reset file status: %s' % res['Message']) continue if 'Failed' in res['Value']:
""" Get the files attached to a transformation """ __RCSID__ = "$Id$" import DIRAC from DIRAC.Core.Base import Script Script.setUsageMessage('\n'.join( [__doc__.split('\n')[1], 'Usage:', ' %s TransID' % Script.scriptName])) Script.parseCommandLine() from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient args = Script.getPositionalArgs() if len(args) != 1: Script.showHelp(exitCode=1) tc = TransformationClient() res = tc.getTransformationFiles({'TransformationID': args[0]}) if not res['OK']: DIRAC.gLogger.error(res['Message']) DIRAC.exit(2) for transfile in res['Value']: DIRAC.gLogger.notice(transfile['LFN'])
class TransformationCLI(cmd.Cmd, API): def __init__(self): self.server = TransformationClient() self.indentSpace = 4 cmd.Cmd.__init__(self) def printPair(self, key, value, separator=":"): valueList = value.split("\n") print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)), separator, valueList[0].strip()) for valueLine in valueList[1:-1]: print "%s %s" % (" " * self.indentSpace, valueLine.strip()) def do_exit(self, args): """ Exits the shell. usage: exit """ sys.exit(0) def do_quit(self, *args): """ Exits the shell. Usage: quit """ sys.exit(0) def do_help(self, args): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commans""" cmd.Cmd.do_help(self, args) # overriting default help command def do_helpall(self, args): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len(args) == 0: print "\nAvailable commands:\n" attrList = dir(self) attrList.sort() for attribute in attrList: if attribute.find("do_") == 0: self.printPair(attribute[3:], getattr(self, attribute).__doc__[1:]) print "" else: command = args.split()[0].strip() try: obj = getattr(self, "do_%s" % command) except: print "There's no such %s command" % command return self.printPair(command, obj.__doc__[1:]) def do_shell(self, args): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall(0, comm) if res['OK'] and res['Value'][0] == 0: returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % (stdOut, stdErr) else: print res['Message'] def check_params(self, args, num): """Checks if the number of parameters correct""" argss = string.split(args) length = len(argss) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num) return (False, length) return (argss, length) def check_id_or_name(self, id_or_name): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long(id_or_name) # its look like id return id_or_name def do_setServer(self, args): """ Set the destination server usage: setServer serverURL """ argss = string.split(args) if len(argss) == 0: print "no server provided" self.serverURL = argss[0] self.server.setServer(self.serverURL) #################################################################### # # These are the methods for transformation manipulation # def do_getall(self, args): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.setServer(self.serverURL) oTrans.getTransformations(transStatus=string.split(args), printOutput=True) def do_getStatus(self, args): """Get transformation details usage: getStatus <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation(transName) if not res['OK']: print "Getting status of %s failed: %s" % (transName, res['Message']) else: print "%s: %s" % (transName, res['Value']['Status']) def do_setStatus(self, args): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = string.split(args) if not len(argss) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status) if not res['OK']: print "Setting status of %s failed: %s" % (transName, res['Message']) else: print "%s set to %s" % (transName, status) def do_start(self, args): """Start transformation usage: start <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active') if not res['OK']: print "Setting Status of %s failed: %s" % (transName, res['Message']) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic') if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message']) else: print "%s started" % transName def do_stop(self, args): """Stop transformation usage: stop <transID|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual') if not res['OK']: print "Stopping of %s failed: %s" % (transName, res['Message']) else: print "%s stopped" % transName def do_flush(self, args): """Flush transformation usage: flush <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush') if not res['OK']: print "Flushing of %s failed: %s" % (transName, res['Message']) else: print "%s flushing" % transName def do_get(self, args): """Get transformation definition usage: get <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: res['Value'].pop('Body') printDict(res['Value']) def do_getBody(self, args): """Get transformation body usage: getBody <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: print res['Value']['Body'] def do_getFileStat(self, args): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats(transName) if not res['OK']: print "Failed to get statistics for %s: %s" % (transName, res['Message']) else: res['Value'].pop('Total') printDict(res['Value']) def do_modMask(self, args): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message']) else: print "Updated %s filemask" % transName def do_getFiles(self, args): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN') else: print "No files found" def do_getFileStatus(self, args): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = string.split(args) if len(argss) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append(fileDict) if filesList: self._printFormattedDictList(filesList, [ 'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate' ], 'LFN', 'LFN') else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_setFileStatus(self, args): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = string.split(args) if not len(argss) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn]) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile(self, args): """Reset file status for the given transformation usage: setFileStatus <transName|ID> <lfn> """ argss = string.split(args) if not len(argss) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns) #################################################################### # # These are the methods for file manipulation # def do_addDirectory(self, args): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = string.split(args) if not len(argss) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory(directory, force=True) if not res['OK']: print 'failed to add directory %s: %s' % (directory, res['Message']) else: print 'added %s files for %s' % (res['Value'], directory) def do_replicas(self, args): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = string.split(args) if not len(argss) > 0: print "no files supplied" return res = self.server.getReplicas(argss) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % (lfn, error) for lfn in sortList(res['Value']['Successful'].keys()): ses = sortList(res['Value']['Successful'][lfn].keys()) outStr = "%s :" % lfn.ljust(100) for se in ses: outStr = "%s %s" % (outStr, se.ljust(15)) print outStr def do_addFile(self, args): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = string.split(args) if not len(argss) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': 'IGNORED-SE', 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addFile(lfnDict, force=True) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % (lfn, error) for lfn in sortList(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeFile(self, args): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = string.split(args) if not len(argss) > 0: print "no files supplied" return res = self.server.removeFile(argss) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % (lfn, error) for lfn in sortList(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_addReplica(self, args): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = string.split(args) if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addReplica(lfnDict, force=True) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % (error) for lfn in sortList(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeReplica(self, args): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = string.split(args) if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.removeReplica(lfnDict) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % (error) for lfn in sortList(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_setReplicaStatus(self, args): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = string.split(args) if not len(argss) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = { 'Status': status, 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.setReplicaStatus(lfnDict) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % (error) for lfn in sortList(res['Value']['Successful'].keys()): print "updated replica status %s" % lfn
class TaskManagerAgentBase(AgentModule): ############################################################################# def initialize(self): self.section = self.am_getOption("section") gMonitor.registerActivity("SubmittedTasks","Automatically submitted tasks","Transformation Monitoring","Tasks", gMonitor.OP_ACUM) self.transClient = TransformationClient() return S_OK() ############################################################################# def execute(self): """ The TaskManagerBase execution method. """ # Determine whether the task status is to be monitored and updated enableTaskMonitor = self.am_getOption('MonitorTasks','') if not enableTaskMonitor: gLogger.info("execute: Monitoring of tasks is disabled.") gLogger.info("execute: To enable create the 'MonitorTasks' option") else: res = self.updateTaskStatus() if not res['OK']: gLogger.warn('execute: Failed to update task states', res['Message']) # Determine whether the task files status is to be monitored and updated enableFileMonitor = self.am_getOption('MonitorFiles','') if not enableFileMonitor: gLogger.info("execute: Monitoring of files is disabled.") gLogger.info("execute: To enable create the 'MonitorFiles' option") else: res = self.updateFileStatus() if not res['OK']: gLogger.warn('execute: Failed to update file states', res['Message']) # Determine whether the checking of reserved tasks is to be performed enableCheckReserved = self.am_getOption('CheckReserved','') if not enableCheckReserved: gLogger.info("execute: Checking of reserved tasks is disabled.") gLogger.info("execute: To enable create the 'CheckReserved' option") else: res = self.checkReservedTasks() if not res['OK']: gLogger.warn('execute: Failed to checked reserved tasks',res['Message']) # Determine whether the submission of tasks is to be executed enableSubmission = self.am_getOption('SubmitTasks','') if not enableSubmission: gLogger.info("execute: Submission of tasks is disabled.") gLogger.info("execute: To enable create the 'SubmitTasks' option") else: res = self.submitTasks() if not res['OK']: gLogger.warn('execute: Failed to submit created tasks', res['Message']) return S_OK() def _selectTransformations(self,transType=[],status=['Active','Completing'],agentType=['Automatic']): selectCond = {} if status: selectCond['Status'] = status if transType: selectCond['Type'] = transType if agentType: selectCond['AgentType'] = agentType res = self.transClient.getTransformations(condDict=selectCond) if not res['OK']: gLogger.error("_selectTransformations: Failed to get transformations for selection.",res['Message']) elif not res['Value']: gLogger.info("_selectTransformations: No transformations found for selection.") else: gLogger.info("_selectTransformations: Obtained %d transformations for selection" % len(res['Value'])) return res def updateTaskStatus(self): gLogger.info("updateTaskStatus: Updating the Status of tasks") # Get the transformations to be updated status = self.am_getOption('UpdateTasksStatus',['Active','Completing','Stopped']) res = self._selectTransformations(transType=self.transType,status=status,agentType=[]) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] # Get the tasks which are in a UPDATE state updateStatus = self.am_getOption('TaskUpdateStatus',['Checking','Deleted','Killed','Staging','Stalled','Matched','Rescheduled','Completed','Submitted','Received','Waiting','Running']) condDict = {"TransformationID":transID,"ExternalStatus":updateStatus} timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10)) res = self.transClient.getTransformationTasks(condDict=condDict,older=timeStamp, timeStamp='LastUpdateTime') if not res['OK']: gLogger.error("updateTaskStatus: Failed to get tasks to update for transformation", "%s %s" % (transID,res['Message'])) continue if not res['Value']: gLogger.verbose("updateTaskStatus: No tasks found to update for transformation %s" % transID) continue res = self.getSubmittedTaskStatus(res['Value']) if not res['OK']: gLogger.error("updateTaskStatus: Failed to get updated task statuses for transformation", "%s %s" % (transID,res['Message'])) continue statusDict = res['Value'] for status in sortList(statusDict.keys()): taskIDs = statusDict[status] gLogger.info("updateTaskStatus: Updating %d task(s) from transformation %d to %s" % (len(taskIDs),transID,status)) res = self.transClient.setTaskStatus(transID,taskIDs,status) if not res['OK']: gLogger.error("updateTaskStatus: Failed to update task status for transformation", "%s %s" % (transID,res['Message'])) gLogger.info("updateTaskStatus: Transformation task status update complete") return S_OK() def updateFileStatus(self): gLogger.info("updateFileStatus: Updating Status of task files") #Get the transformations to be updated status = self.am_getOption('UpdateFilesStatus',['Active','Completing','Stopped']) res = self._selectTransformations(transType=self.transType,status=status,agentType=[]) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] # Get the files which are in a UPDATE state updateStatus = self.am_getOption('FileUpdateStatus',['Submitted','Received','Waiting','Running']) timeStamp = str(datetime.datetime.utcnow() - datetime.timedelta(minutes=10)) condDict = {'TransformationID' : transID, 'Status' : ['Assigned']} res = self.transClient.getTransformationFiles(condDict=condDict,older=timeStamp, timeStamp='LastUpdate') if not res['OK']: gLogger.error("updateFileStatus: Failed to get transformation files to update.",res['Message']) continue if not res['Value']: gLogger.info("updateFileStatus: No files to be updated for transformation %s." % transID) continue res = self.getSubmittedFileStatus(res['Value']) if not res['OK']: gLogger.error("updateFileStatus: Failed to get updated file statuses for transformation","%s %s" % (transID,res['Message'])) continue statusDict = res['Value'] if not statusDict: gLogger.info("updateFileStatus: No file statuses to be updated for transformation %s." % transID) continue fileReport = FileReport(server=self.transClient.getServer()) for lfn,status in statusDict.items(): fileReport.setFileStatus(int(transID),lfn,status) res = fileReport.commit() if not res['OK']: gLogger.error("updateFileStatus: Failed to update file status for transformation", "%s %s" % (transID, res['Message'])) else: for status,update in res['Value'].items(): gLogger.info("updateFileStatus: Updated %s files for %s to %s." % (update, transID, status)) gLogger.info("updateFileStatus: Transformation file status update complete") return S_OK() def checkReservedTasks(self): gLogger.info("checkReservedTasks: Checking Reserved tasks") # Get the transformations which should be checked status = self.am_getOption('CheckReservedStatus',['Active','Completing','Stopped']) res = self._selectTransformations(transType=self.transType,status=status,agentType=[]) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] # Select the tasks which have been in Reserved status for more than 1 hour for selected transformations condDict = {"TransformationID":transID,"ExternalStatus":'Reserved'} time_stamp_older = str(datetime.datetime.utcnow() - datetime.timedelta(hours=1)) time_stamp_newer = str(datetime.datetime.utcnow() - datetime.timedelta(days=7)) res = self.transClient.getTransformationTasks(condDict=condDict,older=time_stamp_older,newer=time_stamp_newer, timeStamp='LastUpdateTime') if not res['OK']: gLogger.error("checkReservedTasks: Failed to get Reserved tasks for transformation", "%s %s" % (transID,res['Message'])) continue if not res['Value']: gLogger.verbose("checkReservedTasks: No Reserved tasks found for transformation %s" % transID) continue res = self.updateTransformationReservedTasks(res['Value']) if not res['OK']: gLogger.info("checkReservedTasks: No Reserved tasks found for transformation %s" % transID) continue noTasks = res['Value']['NoTasks'] taskNameIDs = res['Value']['TaskNameIDs'] # For the tasks with no associated request found re-set the status of the task in the transformationDB for taskName in noTasks: transID,taskID = taskName.split('_') gLogger.info("checkReservedTasks: Resetting status of %s to Created as no associated task found" % (taskName)) res = self.transClient.setTaskStatus(int(transID),int(taskID),'Created') if not res['OK']: gLogger.warn("checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % (taskName,res['Message'])) # For the tasks for which an associated request was found update the task details in the transformationDB for taskName,extTaskID in taskNameIDs.items(): transID,taskID = taskName.split('_') gLogger.info("checkReservedTasks: Resetting status of %s to Created with ID %s" % (taskName,extTaskID)) res = self.transClient.setTaskStatusAndWmsID(int(transID),int(taskID),'Submitted',str(extTaskID)) if not res['OK']: gLogger.warn("checkReservedTasks: Failed to update task status and ID after recovery", "%s %s" % (taskName,res['Message'])) gLogger.info("checkReservedTasks: Updating of reserved tasks complete") return S_OK() def submitTasks(self): gLogger.info("submitTasks: Submitting tasks for transformations") res = getProxyInfo(False,False) if not res['OK']: gLogger.error("submitTasks: Failed to determine credentials for submission",res['Message']) return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] gLogger.info("submitTasks: Tasks will be submitted with the credentials %s:%s" % (owner,ownerGroup)) # Get the transformations which should be submitted tasksPerLoop = self.am_getOption('TasksPerLoop',50) status = self.am_getOption('SubmitStatus',['Active','Completing']) res = self._selectTransformations(transType=self.transType,status=status) if not res['OK']: return res for transformation in res['Value']: transID = transformation['TransformationID'] transBody = transformation['Body'] res = self.transClient.getTasksToSubmit(transID,tasksPerLoop) if not res['OK']: gLogger.error("submitTasks: Failed to obtain tasks for transformation", "%s %s" % (transID,res['Message'])) continue tasks = res['Value']['JobDictionary'] if not tasks: gLogger.verbose("submitTasks: No tasks found for submission for transformation %s" % transID) continue gLogger.info("submitTasks: Obtained %d tasks for submission for transformation %s" % (len(tasks),transID)) res = self.prepareTransformationTasks(transBody,tasks,owner,ownerGroup) if not res['OK']: gLogger.error("submitTasks: Failed to prepare tasks for transformation", "%s %s" % (transID,res['Message'])) continue res = self.submitTransformationTasks(res['Value']) if not res['OK']: gLogger.error("submitTasks: Failed to submit prepared tasks for transformation", "%s %s" % (transID,res['Message'])) continue res = self.updateDBAfterTaskSubmission(res['Value']) if not res['OK']: gLogger.error("submitTasks: Failed to update DB after task submission for transformation", "%s %s" % (transID,res['Message'])) continue gLogger.info("submitTasks: Submission of transformation tasks complete") return S_OK()
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { "TransformationID": [types.IntType, types.LongType], "TransformationName": types.StringTypes, "Status": types.StringTypes, "Description": types.StringTypes, "LongDescription": types.StringTypes, "Type": types.StringTypes, "Plugin": types.StringTypes, "AgentType": types.StringTypes, "FileMask": types.StringTypes, "TransformationGroup": types.StringTypes, "GroupSize": [types.IntType, types.LongType, types.FloatType], "InheritedFrom": [types.IntType, types.LongType], "Body": types.StringTypes, "MaxNumberOfTasks": [types.IntType, types.LongType], "EventsPerTask": [types.IntType, types.LongType], } self.paramValues = { "TransformationID": 0, "TransformationName": "", "Status": "New", "Description": "", "LongDescription": "", "Type": "", "Plugin": "Standard", "AgentType": "Manual", "FileMask": "", "TransformationGroup": "General", "GroupSize": 1, "InheritedFrom": 0, "Body": "", "MaxNumberOfTasks": 0, "EventsPerTask": 0, } self.ops = Operations() self.supportedPlugins = self.ops.getValue( "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"] ) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues["TransformationID"] = transID res = self.getTransformation() if res["OK"]: self.exists = True elif res["Message"] == "Transformation does not exist": raise AttributeError, "TransformationID %d does not exist" % transID else: self.paramValues["TransformationID"] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL) ) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE("TargetSE", seList) def setSourceSE(self, seList): return self.__setSE("SourceSE", seList) def __setSE(self, se, seList): if type(seList) in types.StringTypes: try: seList = eval(seList) except: seList = seList.replace(",", " ").split() res = self.__checkSEs(seList) if not res["OK"]: return res self.item_called = se return self.__setParam(seList) def __getattr__(self, name): if name.find("get") == 0: item = name[3:] self.item_called = item return self.__getParam if name.find("set") == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam(self): if self.item_called == "Available": return S_OK(self.paramTypes.keys()) if self.item_called == "Parameters": return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam(self, value): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type(value), self.paramTypes[self.item_called], ) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key(self.item_called): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues["TransformationID"] if self.exists and transID: res = self.transClient.setTransformationParameter(transID, self.item_called, value) if not res["OK"]: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transParams = res["Value"] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr(self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error("Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res["OK"]: if printOutput: self._prettyPrint(res) return res loggingList = res["Value"] if printOutput: self._printFormattedDictList( loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate" ) return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation("cleanTransformation", printOutput=printOutput) if res["OK"]: self.paramValues["Status"] = "Cleaned" return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation("deleteTransformation", printOutput=printOutput) if res["OK"]: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation("getTransformationStats", printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False): return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop("printOutput") fcn = None if hasattr(self.transClient, operation) and callable(getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR("Unable to invoke %s, it isn't a member funtion of TransformationClient") res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles( self, fileStatus=[], lfns=[], outputFields=[ "FileID", "LFN", "Status", "TaskID", "TargetSE", "UsedSE", "ErrorCount", "InsertedTime", "LastUpdate", ], orderBy="FileID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if fileStatus: condDict["Status"] = fileStatus if lfns: condDict["LFN"] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy) return res def getTransformationTasks( self, taskStatus=[], taskIDs=[], outputFields=[ "TransformationID", "TaskID", "ExternalStatus", "ExternalID", "TargetSE", "CreationTime", "LastUpdateTime", ], orderBy="TaskID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if taskStatus: condDict["ExternalStatus"] = taskStatus if taskIDs: condDict["TaskID"] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy) return res ############################################################################# def getTransformations( self, transID=[], transStatus=[], outputFields=["TransformationID", "Status", "AgentType", "TransformationName", "CreationDate"], orderBy="TransformationID", printOutput=False, ): condDict = {} if transID: condDict["TransformationID"] = transID if transStatus: condDict["Status"] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy) return res ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): res = self._checkCreation() if not res["OK"]: return self._errorReport(res, "Failed transformation sanity check") if printOutput: gLogger.info("Will attempt to create transformation with the following parameters") self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues["TransformationName"], self.paramValues["Description"], self.paramValues["LongDescription"], self.paramValues["Type"], self.paramValues["Plugin"], self.paramValues["AgentType"], self.paramValues["FileMask"], transformationGroup=self.paramValues["TransformationGroup"], groupSize=self.paramValues["GroupSize"], inheritedFrom=self.paramValues["InheritedFrom"], body=self.paramValues["Body"], maxTasks=self.paramValues["MaxNumberOfTasks"], eventsPerTask=self.paramValues["EventsPerTask"], addFiles=addFiles, ) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transID = res["Value"] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key(paramName): res = self.transClient.setTransformationParameter(transID, paramName, paramValue) if not res["OK"]: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"])) gLogger.notice("To add this parameter later please execute the following.") gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): """ Few checks """ if self.paramValues["TransformationID"]: gLogger.info("You are currently working with an active transformation definition.") gLogger.info("If you wish to create a new transformation reset the TransformationID.") gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = ["TransformationName", "Description", "LongDescription", "Type"] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info("%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = raw_input("Please enter the value of " + parameter + " ") plugin = self.paramValues["Plugin"] if plugin: if not plugin in self.supportedPlugins: gLogger.info("The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard") if not res["OK"]: return res self.paramValues["Plugin"] = res["Value"] plugin = self.paramValues["Plugin"] return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues["GroupSize"] if groupSize <= 0: gLogger.info("The GroupSize was found to be less than zero. It has been set to 1.") res = self.setGroupSize(1) if not res["OK"]: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"])) ) requiredParams = ["SourceSE", "TargetSE"] for requiredParam in requiredParams: if (not self.paramValues.has_key(requiredParam)) or (not self.paramValues[requiredParam]): paramValue = raw_input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable(getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR("Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(",", " ").split() res = setter(ses) if not res["OK"]: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections("/Resources/StorageElements") if not res["OK"]: return self._errorReport(res, "Failed to get possible StorageElements") missing = [] for se in seList: if not se in res["Value"]: gLogger.error("StorageElement %s is not known" % se) missing.append(se) if missing: return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default="", insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res["OK"]: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"])) paramValue = res["Value"] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR("Unable to invoke %s, it isn't a member function of Transformation!") res = setter(paramValue) if not res["OK"]: return res return S_OK(paramValue)
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { 'TransformationID': [types.IntType, types.LongType], 'TransformationName': types.StringTypes, 'Status': types.StringTypes, 'Description': types.StringTypes, 'LongDescription': types.StringTypes, 'Type': types.StringTypes, 'Plugin': types.StringTypes, 'AgentType': types.StringTypes, 'FileMask': types.StringTypes, 'TransformationGroup': types.StringTypes, 'GroupSize': [types.IntType, types.LongType, types.FloatType], 'InheritedFrom': [types.IntType, types.LongType], 'Body': types.StringTypes, 'MaxNumberOfTasks': [types.IntType, types.LongType], 'EventsPerTask': [types.IntType, types.LongType] } self.paramValues = { 'TransformationID': 0, 'TransformationName': '', 'Status': 'New', 'Description': '', 'LongDescription': '', 'Type': '', 'Plugin': 'Standard', 'AgentType': 'Manual', 'FileMask': '', 'TransformationGroup': 'General', 'GroupSize': 1, 'InheritedFrom': 0, 'Body': '', 'MaxNumberOfTasks': 0, 'EventsPerTask': 0 } self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError('TransformationID %d does not exist' % transID) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE('TargetSE', seList) def setSourceSE(self, seList): return self.__setSE('SourceSE', seList) def setBody(self, body): """ check that the body is a string, or using the proper syntax for multiple operations :param body: transformation body, for example .. code :: python body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }), ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ), ] :type body: string or list of tuples (or lists) of string and dictionaries :raises TypeError: If the structure is not as expected :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used :returns: S_OK, S_ERROR """ self.item_called = "Body" if isinstance(body, basestring): return self.__setParam(body) if not isinstance(body, (list, tuple)): raise TypeError("Expected list or string, but %r is %s" % (body, type(body))) for tup in body: if not isinstance(tup, (tuple, list)): raise TypeError("Expected tuple or list, but %r is %s" % (tup, type(tup))) if len(tup) != 2: raise TypeError("Expected 2-tuple, but %r is length %d" % (tup, len(tup))) if not isinstance(tup[0], basestring): raise TypeError( "Expected string, but first entry in tuple %r is %s" % (tup, type(tup[0]))) if not isinstance(tup[1], dict): raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % (tup, type(tup[0]))) for par, val in tup[1].iteritems(): if not isinstance(par, basestring): raise TypeError( "Expected string, but key in dictionary %r is %s" % (par, type(par))) if not par in Operation.ATTRIBUTE_NAMES: raise ValueError("Unknown attribute for Operation: %s" % par) if not isinstance( val, (basestring, int, long, float, list, tuple, dict)): raise TypeError("Cannot encode %r, in json" % (val)) return self.__setParam(json.dumps(body)) def __setSE(self, seParam, seList): if isinstance(seList, basestring): try: seList = eval(seList) except BaseException: seList = seList.split(',') elif isinstance(seList, (list, dict, tuple)): seList = list(seList) else: return S_ERROR("Bad argument type") res = self.__checkSEs(seList) if not res['OK']: return res self.item_called = seParam return self.__setParam(seList) def __getattr__(self, name): if name.find('get') == 0: item = name[3:] self.item_called = item return self.__getParam if name.find('set') == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError(name) def __getParam(self): if self.item_called == 'Available': return S_OK(self.paramTypes.keys()) if self.item_called == 'Parameters': return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError("Unknown parameter for transformation: %s" % self.item_called) def __setParam(self, value): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError("%s %s %s expected one of %s" % (self.item_called, value, type(value), self.paramTypes[self.item_called])) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res['OK']: if printOutput: self._prettyPrint(res) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res['OK']: if printOutput: self._prettyPrint(res) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate') return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation('extendTransformation', nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation('cleanTransformation', printOutput=printOutput) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation('deleteTransformation', printOutput=printOutput) if res['OK']: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation('addFilesToTransformation', lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation('setFileStatusForTransformation', status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation('getTransformationTaskStats', printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation('getTransformationStats', printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation('deleteTasks', taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se='Unknown', printOutput=False): return self.__executeOperation('addTaskForTransformation', lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation('setTaskStatus', taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop('printOutput') fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles(self, fileStatus=[], lfns=[], outputFields=[ 'FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate' ], orderBy='FileID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'FileID', orderBy) return res def getTransformationTasks(self, taskStatus=[], taskIDs=[], outputFields=[ 'TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime' ], orderBy='TaskID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TaskID', orderBy) return res ############################################################################# def getTransformations(self, transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate' ], orderBy='TransformationID', printOutput=False): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getAuthorDNfromProxy(self): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error("Unable to get uploaded proxy Info %s " % res['Message']) return S_ERROR(res['Message']) res = {'username': username, 'authorDN': author} return S_OK(res) ############################################################################# def getTransformationsByUser(self, authorDN="", userName="", transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN' ], orderBy='TransformationID', printOutput=False): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error(res['Message']) return S_ERROR(res['Message']) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not (userName == "" or userName == foundUserName): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % (userName, ', '.join(transStatus))) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % (authorDN, ', '.join(transStatus))) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getSummaryTransformations(self, transID=[]): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = {'TransformationID': transID} orderby = [] start = 0 maxitems = len(transID) paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \ 'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \ 'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \ 'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems) if not result['OK']: self._prettyPrint(result) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map( lambda pname: paramValues[paramNames.index(pname)], paramShowNames) showDict = dict(zip(paramShowNamesShort, paramShowValues)) dictList.append(showDict) except Exception as x: print 'Exception %s ' % str(x) if not len(dictList) > 0: gLogger.error( 'No found transformations satisfying input condition') return S_ERROR( 'No found transformations satisfying input condition') else: print self._printFormattedDictList(dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0]) return S_OK(dictList) ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): res = self._checkCreation() if not res['OK']: return self._errorReport(res, 'Failed transformation sanity check') if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup=self.paramValues['TransformationGroup'], groupSize=self.paramValues['GroupSize'], inheritedFrom=self.paramValues['InheritedFrom'], body=self.paramValues['Body'], maxTasks=self.paramValues['MaxNumberOfTasks'], eventsPerTask=self.paramValues['EventsPerTask'], addFiles=addFiles) if not res['OK']: if printOutput: self._prettyPrint(res) return res transID = res['Value'] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if paramName not in self.paramTypes: res = self.transClient.setTransformationParameter( transID, paramName, paramValue) if not res['OK']: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res['Message'])) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): """ Few checks """ if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = [ 'TransformationName', 'Description', 'LongDescription', 'Type' ] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " ") plugin = self.paramValues['Plugin'] if plugin: if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter('Plugin', choices=self.supportedPlugins, default='Standard') if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues['GroupSize'] if groupSize <= 0: gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize(1) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (', '.join(['SourceSE', 'TargetSE']))) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if not self.paramValues.get(requiredParam): paramValue = raw_input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable( getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(',', ' ').split() res = setter(ses) if not res['OK']: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections('/Resources/StorageElements') if not res['OK']: return self._errorReport(res, 'Failed to get possible StorageElements') missing = set(seList) - set(res['Value']) if missing: for se in missing: gLogger.error("StorageElement %s is not known" % se) return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default='', insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res['OK']: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res['Value'])) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter(paramValue) if not res['OK']: return res return S_OK(paramValue)
class TransformationCLI( cmd.Cmd, API ): def __init__( self ): self.server = TransformationClient() self.indentSpace = 4 cmd.Cmd.__init__( self ) API.__init__( self ) def printPair( self, key, value, separator = ":" ): valueList = value.split( "\n" ) print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() ) for valueLine in valueList[ 1:-1 ]: print "%s %s" % ( " " * self.indentSpace, valueLine.strip() ) def do_exit( self, args ): """ Exits the shell. usage: exit """ sys.exit( 0 ) def do_quit( self, *args ): """ Exits the shell. Usage: quit """ sys.exit( 0 ) def do_help( self, args ): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commands""" cmd.Cmd.do_help( self, args ) # overriting default help command def do_helpall( self, args ): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len( args ) == 0: print "\nAvailable commands:\n" attrList = dir( self ) attrList.sort() for attribute in attrList: if attribute.find( "do_" ) == 0: self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] ) print "" else: command = args.split()[0].strip() try: obj = getattr( self, "do_%s" % command ) except: print "There's no such %s command" % command return self.printPair( command, obj.__doc__[1:] ) def do_shell( self, args ): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall( 0, comm ) if res['OK'] and res['Value'][0] == 0: _returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % ( stdOut, stdErr ) else: print res['Message'] def check_params( self, args, num ): """Checks if the number of parameters correct""" argss = args.split() length = len( argss ) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num ) return ( False, length ) return ( argss, length ) def check_id_or_name( self, id_or_name ): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long( id_or_name ) # its look like id return id_or_name #################################################################### # # These are the methods for transformation manipulation # def do_getall( self, args ): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.getTransformations( transStatus = args.split(), printOutput = True ) def do_getAllByUser( self, args ): """Get all transformations created by a given user The first argument is the authorDN or username. The authorDN is preferred: it need to be inside quotes because contains white spaces. Only authorDN should be quoted. When the username is provided instead, the authorDN is retrieved from the uploaded proxy, so that the retrieved transformations are those created by the user who uploaded that proxy: that user could be different that the username provided to the function. usage: getAllByUser authorDN or username [Status] [Status] """ oTrans = Transformation() argss = args.split() username = "" author = "" status = [] if not len( argss ) > 0: print self.do_getAllByUser.__doc__ return # if the user didnt quoted the authorDN ends if '=' in argss[0] and argss[0][0] not in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return if argss[0][0] in ["'", '"']: # authorDN given author = argss[0] status_idx = 1 for arg in argss[1:]: author += ' ' + arg status_idx +=1 if arg[-1] in ["'", '"']: break # At this point we should have something like 'author' if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return else: author = author[1:-1] # throw away the quotes # the rest are the requested status status = argss[ status_idx: ] else: # username given username = argss[0] status = argss[ 1: ] oTrans.getTransformationsByUser( authorDN = author, userName = username, transStatus = status, printOutput = True ) def do_summaryTransformations( self, args ): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. Usage: summaryTransformations <ProdID> [<ProdID> ...] """ argss = args.split() if not len( argss ) > 0: print self.do_summaryTransformations.__doc__ return transid = argss oTrans = Transformation() oTrans.getSummaryTransformations( transID = transid ) def do_getStatus( self, args ): """Get transformation details usage: getStatus <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation( transName ) if not res['OK']: print "Getting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s: %s" % ( transName, res['Value']['Status'] ) def do_setStatus( self, args ): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = args.split() if not len( argss ) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status ) if not res['OK']: print "Setting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s set to %s" % ( transName, status ) def do_start( self, args ): """Start transformation usage: start <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active' ) if not res['OK']: print "Setting Status of %s failed: %s" % ( transName, res['Message'] ) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' ) if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] ) else: print "%s started" % transName def do_stop( self, args ): """Stop transformation usage: stop <transID|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' ) if not res['OK']: print "Stopping of %s failed: %s" % ( transName, res['Message'] ) else: print "%s stopped" % transName def do_flush( self, args ): """Flush transformation usage: flush <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush' ) if not res['OK']: print "Flushing of %s failed: %s" % ( transName, res['Message'] ) else: print "%s flushing" % transName def do_get( self, args ): """Get transformation definition usage: get <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Body' ) printDict( res['Value'] ) def do_getBody( self, args ): """Get transformation body usage: getBody <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: print res['Value']['Body'] def do_getFileStat( self, args ): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats( transName ) if not res['OK']: print "Failed to get statistics for %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Total' ) printDict( res['Value'] ) def do_modMask( self, args ): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask ) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] ) else: print "Updated %s filemask" % transName def do_getFiles( self, args ): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "No files found" def do_getFileStatus( self, args ): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = args.split() if len( argss ) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append( fileDict ) if filesList: self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_getOutputFiles( self, args ): """Get output files for the transformation usage: getOutputFiles <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: fc = FileCatalog() meta = {} meta ['ProdID'] = transName res = fc.findFilesByMetadata( meta ) if not res['OK']: print res['Message'] return if not len( res['Value'] ) > 0: print 'No output files yet for transformation %d' %int(transName) return else: for lfn in res['Value']: print lfn def do_getInputDataQuery( self, args ): """Get input data query for the transformation usage: getInputDataQuery <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationInputDataQuery( transName ) if not res['OK']: print "Failed to get transformation input data query: %s" % res['Message'] else: print res['Value'] def do_setFileStatus( self, args ): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = args.split() if not len( argss ) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn] ) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile( self, args ): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfns> """ argss = args.split() if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if 'Failed' in res['Value']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) def do_resetProcessedFile( self, args ): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = args.split() if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) #################################################################### # # These are the methods for file manipulation # def do_addDirectory( self, args ): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = args.split() if not len( argss ) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory( directory, force = True ) if not res['OK']: print 'failed to add directory %s: %s' % ( directory, res['Message'] ) else: print 'added %s files for %s' % ( res['Value'], directory ) def do_replicas( self, args ): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = args.split() if not len( argss ) > 0: print "no files supplied" return res = self.server.getReplicas( argss ) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % ( lfn, error ) for lfn in sorted( res['Value']['Successful'].keys() ): ses = sorted( res['Value']['Successful'][lfn].keys() ) outStr = "%s :" % lfn.ljust( 100 ) for se in ses: outStr = "%s %s" % ( outStr, se.ljust( 15 ) ) print outStr def do_addFile( self, args ): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = args.split() if not len( argss ) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addFile( lfnDict, force = True ) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % ( lfn, error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeFile( self, args ): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = args.split() if not len( argss ) > 0: print "no files supplied" return res = self.server.removeFile( argss ) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % ( lfn, error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_addReplica( self, args ): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = args.split() if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addReplica( lfnDict, force = True ) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % ( error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeReplica( self, args ): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = args.split() if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.removeReplica( lfnDict ) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % ( error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_setReplicaStatus( self, args ): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = args.split() if not len( argss ) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.setReplicaStatus( lfnDict ) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % ( error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "updated replica status %s" % lfn
class TransformationAgent( AgentModule ): def initialize( self ): """ standard init """ self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' ) self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' ) self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] ) self.maxFiles = self.am_getOption( 'MaxFiles', 5000 ) self.am_setOption( 'shifterProxy', 'ProductionManager' ) self.transDB = TransformationClient( 'TransformationDB' ) self.rm = ReplicaManager() self.unusedFiles = {} return S_OK() def execute( self ): """ get and process the transformations to be processed """ res = self.getTransformations() if not res['OK']: gLogger.info( "execute: Failed to obtain transformations: %s" % res['Message'] ) return S_OK() # Process the transformations for transDict in res['Value']: transID = long( transDict['TransformationID'] ) gLogger.info( "execute: Processing transformation %s." % transID ) startTime = time.time() res = self.processTransformation( transDict ) if not res['OK']: gLogger.info( "execute: Failed to process transformation: %s" % res['Message'] ) else: gLogger.info( "execute: Processed transformation in %.1f seconds" % ( time.time() - startTime ) ) return S_OK() def getTransformations( self ): """ Obtain the transformations to be executed """ transName = self.am_getOption( 'Transformation', 'All' ) if transName == 'All': gLogger.info( "getTransformations: Initializing general purpose agent." ) res = self.transDB.getTransformations( {'Status':self.transformationStatus}, extraParams = True ) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message'] ) return res transformations = res['Value'] gLogger.info( "getTransformations: Obtained %d transformations to process" % len( transformations ) ) else: gLogger.info( "getTransformations: Initializing for transformation %s." % transName ) res = self.transDB.getTransformation( transName, extraParams = True ) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message'] ) return res transformations = [res['Value']] return S_OK( transformations ) def processTransformation( self, transDict ): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} ) if not res['OK']: gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message'] ) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "processTransformation: No 'Unused' files found for transformation." ) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] ) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() #Check if something new happened if len( lfns ) == self.unusedFiles.get( transID, 0 ) and transDict['Status'] != 'Flush': gLogger.info( "processTransformation: No new 'Unused' files found for transformation." ) return S_OK() replicateOrRemove = transDict['Type'].lower() in ["replication", "removal"] # Limit the number of LFNs to be considered for replication or removal as they are treated individually if replicateOrRemove: lfns = lfns[0:self.maxFiles - 1] unusedFiles = len( lfns ) # Check the data is available with replicas res = self.__getDataReplicas( transID, lfns, active = not replicateOrRemove ) if not res['OK']: gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message'] ) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key( 'Plugin' ) and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin ) res = self.__generatePluginObject( plugin ) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters( transDict ) oPlugin.setInputData( dataReplicas ) oPlugin.setTransformationFiles( transFiles ) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message'] ) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation( transID, lfns, se ) if not res['OK']: gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message'] ) allCreated = False else: created += 1 unusedFiles -= len( lfns ) if created: gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created ) self.unusedFiles[transID] = unusedFiles # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] ) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject( self, plugin ): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] ) except ImportError, e: gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % ( plugin, e ) ) return S_ERROR() try: plugin_o = getattr( plugModule, 'TransformationPlugin' )( '%s' % plugin, transClient = self.transDB, replicaManager = self.rm ) return S_OK( plugin_o ) except AttributeError, e: gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % ( plugin, e ) ) return S_ERROR()
class ConsistencyInspector(object): """ A class for handling some consistency checks """ def __init__(self, interactive=True, transClient=None, dm=None, fc=None, dic=None): """ c'tor interactive: Data Manager (True) or DIRAC Agente (False) transClient: TransformationClient() if None, else transClient params dm: DataManager() if None, else dm params fc: FileCatalog() if None, else fc params One object for every production/directoriesList... """ self.interactive = interactive self.transClient = TransformationClient( ) if transClient is None else transClient self.dm = dm if dm else DataManager() self.fc = fc if fc else FileCatalog() self.dic = dic if dic else DataIntegrityClient() self.dirac = Dirac() # Base elements from which to start the consistency checks self._prod = 0 self._bkQuery = None self._fileType = [] self._fileTypesExcluded = [] self._lfns = [] self.noLFC = False self.directories = [] # Accessory elements self.runsList = [] self.runStatus = None self.fromProd = None self.transType = '' self.cachedReplicas = {} self.prcdWithDesc = [] self.prcdWithoutDesc = [] self.prcdWithMultDesc = [] self.nonPrcdWithDesc = [] self.nonPrcdWithoutDesc = [] self.nonPrcdWithMultDesc = [] self.descForPrcdLFNs = [] self.descForNonPrcdLFNs = [] self.removedFiles = [] self.absentLFNsInFC = [] self.existLFNsNoSE = {} self.existLFNsBadReplicas = {} self.existLFNsBadFiles = {} self.existLFNsNotExisting = {} self.commonAncestors = {} self.multipleDescendants = {} self.ancestors = {} self._verbose = False def __logVerbose(self, msg, msg1=''): if self._verbose: newMsg = '[ConsistencyChecks] ' + ( '[%s] ' % str(self.prod)) if self.prod else '' # Add that prefix to all lines of the message newMsg1 = msg1.replace('\n', '\n' + newMsg) newMsg += msg.replace('\n', '\n' + newMsg) gLogger.notice(newMsg, newMsg1) else: gLogger.verbose(msg, msg1) ################################################################################ def checkFC2SE(self): repDict = self.compareChecksum(self.lfns) self.existLFNsNoSE = repDict['MissingReplica'] self.existLFNsNotExisting = repDict['MissingAllReplicas'] self.existLFNsBadReplicas = repDict['SomeReplicasCorrupted'] self.existLFNsBadFiles = repDict['AllReplicasCorrupted'] def getReplicasPresence(self, lfns): """ get the replicas using the standard FileCatalog.getReplicas() """ present = set() notPresent = set() chunkSize = 100 printProgress = (len(lfns) > chunkSize) startTime = time.time() self.__write( "Checking replicas for %d files%s" % (len(lfns), (' (chunks of %d)' % chunkSize) if printProgress else '... ')) for chunk in breakListIntoChunks(lfns, chunkSize): if printProgress: self.__write('.') for _ in xrange(1, 10): res = self.fc.getReplicas(chunk) if res['OK']: present.update(res['Value']['Successful']) self.cachedReplicas.update(res['Value']['Successful']) notPresent.update(res['Value']['Failed']) break else: time.sleep(0.1) self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) if notPresent: self.__logVerbose("Files without replicas:", '\n'.join([''] + sorted(notPresent))) return list(present), list(notPresent) ################################################################################ def getReplicasPresenceFromDirectoryScan(self, lfns): """ Get replicas scanning the directories. Might be faster. """ dirs = {} present = [] notPresent = [] compare = True for lfn in lfns: dirN = os.path.dirname(lfn) if lfn == dirN + '/': compare = False dirs.setdefault(dirN, []).append(lfn) if compare: self.__write( "Checking File Catalog for %d files from %d directories " % (len(lfns), len(dirs))) else: self.__write("Getting files from %d directories " % len(dirs)) startTime = time.time() for dirN in sorted(dirs): startTime1 = time.time() self.__write('.') lfnsFound = self._getFilesFromDirectoryScan(dirN) gLogger.verbose("Obtained %d files in %.1f seconds" % (len(lfnsFound), time.time() - startTime1)) if compare: pr, notPr = self.__compareLFNLists(dirs[dirN], lfnsFound) notPresent += notPr present += pr else: present += lfnsFound self.__write(' (%.1f seconds)\n' % (time.time() - startTime)) gLogger.info("Found %d files with replicas and %d without" % (len(present), len(notPresent))) return present, notPresent ################################################################################ def __compareLFNLists(self, lfns, lfnsFound): """ return files in both lists and files in lfns and not in lfnsFound """ present = [] notPresent = lfns startTime = time.time() self.__logVerbose("Comparing list of %d LFNs with second list of %d" % (len(lfns), len(lfnsFound))) if lfnsFound: setLfns = set(lfns) setLfnsFound = set(lfnsFound) present = list(setLfns & setLfnsFound) notPresent = list(setLfns - setLfnsFound) self.__logVerbose("End of comparison: %.1f seconds" % (time.time() - startTime)) return present, notPresent def _getFilesFromDirectoryScan(self, dirs): """ calls dm.getFilesFromDirectory """ level = gLogger.getLevel() gLogger.setLevel('FATAL') res = self.dm.getFilesFromDirectory(dirs) gLogger.setLevel(level) if not res['OK']: if 'No such file or directory' not in res['Message']: gLogger.error( "Error getting files from directories %s:" % dirs, res['Message']) return [] if res['Value']: lfnsFound = res['Value'] else: lfnsFound = [] return lfnsFound ################################################################################ def _getTSFiles(self): """ Helper function - get files from the TS """ selectDict = {'TransformationID': self.prod} if self._lfns: selectDict['LFN'] = self._lfns elif self.runStatus and self.fromProd: res = self.transClient.getTransformationRuns({ 'TransformationID': self.fromProd, 'Status': self.runStatus }) if not res['OK']: gLogger.error("Failed to get runs for transformation %d" % self.prod) else: if res['Value']: self.runsList.extend([ run['RunNumber'] for run in res['Value'] if run['RunNumber'] not in self.runsList ]) gLogger.notice("%d runs selected" % len(res['Value'])) elif not self.runsList: gLogger.notice("No runs selected, check completed") DIRAC.exit(0) if not self._lfns and self.runsList: selectDict['RunNumber'] = self.runsList res = self.transClient.getTransformation(self.prod) if not res['OK']: gLogger.error("Failed to find transformation %s" % self.prod) return [], [], [] status = res['Value']['Status'] if status not in ('Active', 'Stopped', 'Completed', 'Idle'): gLogger.notice( "Transformation %s in status %s, will not check if files are processed" % (self.prod, status)) processedLFNs = [] nonProcessedLFNs = [] nonProcessedStatuses = [] if self._lfns: processedLFNs = self._lfns else: res = self.transClient.getTransformationFiles(selectDict) if not res['OK']: gLogger.error( "Failed to get files for transformation %d" % self.prod, res['Message']) return [], [], [] else: processedLFNs = [ item['LFN'] for item in res['Value'] if item['Status'] == 'Processed' ] nonProcessedLFNs = [ item['LFN'] for item in res['Value'] if item['Status'] != 'Processed' ] nonProcessedStatuses = list( set(item['Status'] for item in res['Value'] if item['Status'] != 'Processed')) return processedLFNs, nonProcessedLFNs, nonProcessedStatuses def __getDirectories(self): """ get the directories where to look into (they are either given, or taken from the transformation ID """ if self.directories: directories = [] printout = False for directory in self.directories: if not directory.endswith('...'): directories.append(directory) else: printout = True topDir = os.path.dirname(directory) res = self.fc.listDirectory(topDir) if not res['OK']: return S_ERROR( errno.ENOENT, res['Message'] ) #DError(errno.ENOENT, res['Message'] ) else: matchDir = directory.split('...')[0] directories += [ d for d in res['Value']['Successful'].get( topDir, {}).get('SubDirs', []) if d.startswith(matchDir) ] if printout: gLogger.always('Expanded list of %d directories:\n%s' % (len(directories), '\n'.join(directories))) return directories else: return S_ERROR( errno.ENOENT, 'Need to specify the directories' ) #DError(errno.ENOENT, 'Need to specify the directories') ################################################################################ def __write(self, text): if self.interactive: sys.stdout.write(text) sys.stdout.flush() print text ################################################################################ def _selectByFileType(self, lfnDict, fileTypes=None, fileTypesExcluded=None): """ Select only those files from the values of lfnDict that have a certain type """ if not lfnDict: return {} if not fileTypes: fileTypes = self.fileType if not fileTypesExcluded: fileTypesExcluded = self.fileTypesExcluded else: fileTypesExcluded += [ ft for ft in self.fileTypesExcluded if ft not in fileTypesExcluded ] # lfnDict is a dictionary of dictionaries including the metadata, create a deep copy to get modified ancDict = dict(lfnDict) if fileTypes == ['']: fileTypes = [] # and loop on the original dictionaries for ancestor in lfnDict: for desc in lfnDict[ancestor].keys(): ft = lfnDict[ancestor][desc]['FileType'] if ft in fileTypesExcluded or (fileTypes and ft not in fileTypes): ancDict[ancestor].pop(desc) if not len(ancDict[ancestor]): ancDict.pop(ancestor) return ancDict @staticmethod def _getFileTypesCount(lfnDict): """ return file types count """ ft_dict = {} for ancestor in lfnDict: t_dict = {} for desc in lfnDict[ancestor]: ft = lfnDict[ancestor][desc]['FileType'] t_dict[ft] = t_dict.setdefault(ft, 0) + 1 ft_dict[ancestor] = t_dict return ft_dict def __getLFNsFromFC(self): if not self.lfns: directories = [] for dirName in self.__getDirectories(): if not dirName.endswith('/'): dirName += '/' directories.append(dirName) present, notPresent = self.getReplicasPresenceFromDirectoryScan( directories) else: present, notPresent = self.getReplicasPresence(self.lfns) return present, notPresent def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = { 'AllReplicasCorrupted': {}, 'SomeReplicasCorrupted': {}, 'MissingReplica': {}, 'MissingAllReplicas': {}, 'NoReplicas': {} } chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write('.') replicasRes = self.fc.getReplicas(lfnChunk) if not replicasRes['OK']: gLogger.error("error: %s" % replicasRes['Message']) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes['Message']) replicasRes = replicasRes['Value'] if replicasRes['Failed']: retDict['NoReplicas'].update(replicasRes['Failed']) replicas.update(replicasRes['Successful']) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas.keys(), chunkSize): self.__write('.') res = self.fc.getFileMetadata(lfnChunk) if not res['OK']: return S_ERROR(errno.ENOENT, "error %s" % res['Message']) metadata.update(res['Value']['Successful']) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get( lfn, {}).get('Checksum') for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice('Getting checksum of %d replicas in %d SEs' % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel('FATAL') for num, se in enumerate(sorted(seFiles)): self.__write('\n%d. At %s (%d files): ' % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write('.') metadata = oSe.getFileMetadata(surlChunk) if not metadata['OK']: gLogger.error( "Error: getFileMetadata returns %s. Ignore those replicas" % (metadata['Message'])) # Remove from list of replicas as we don't know whether it is OK or not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata['Value'] notFound += len(metadata['Failed']) for lfn in metadata['Failed']: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata['Successful']: checkSum.setdefault( lfn, {})[se] = metadata['Successful'][lfn]['Checksum'] if notFound: gLogger.error('%d files not found' % notFound) gLogger.setLevel(logLevel) gLogger.notice('Verifying checksum of %d files' % len(replicas)) for lfn in replicas: # get the lfn checksum from the LFC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True lfcChecksum = csDict[lfn].pop('LFCChecksum') for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, '') if not surlChecksum or not compareAdler( lfcChecksum, surlChecksum): # if lfcChecksum does not match surlChecksum csDict[lfn][se] = {'PFNChecksum': surlChecksum} gLogger.info( "ERROR!! checksum mismatch at %s for LFN %s: LFC checksum: %s , PFN checksum : %s " % (se, lfn, lfcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict['MissingAllReplicas'][lfn] = 'All' else: gLogger.info("=> All replicas have bad checksum", lfn) retDict['AllReplicasCorrupted'][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict['MissingReplica'][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn] return S_OK(retDict) ################################################################################ # properties def set_prod(self, value): """ Setter """ if value: value = int(value) res = self.transClient.getTransformation(value, extraParams=False) if not res['OK']: S_ERROR( errno.ENOENT, "Couldn't find transformation %d: %s" % (value, res['Message'])) else: self.transType = res['Value']['Type'] if self.interactive: gLogger.info("Production %d has type %s" % (value, self.transType)) else: value = 0 self._prod = value def get_prod(self): """ Getter """ return self._prod prod = property(get_prod, set_prod) def set_fileType(self, value): """ Setter """ self._fileType = [ft.upper() for ft in value] def get_fileType(self): """ Getter """ return self._fileType fileType = property(get_fileType, set_fileType) def set_fileTypesExcluded(self, value): """ Setter """ self._fileTypesExcluded = [ft.upper() for ft in value] def get_fileTypesExcluded(self): """ Getter """ return self._fileTypesExcluded fileTypesExcluded = property(get_fileTypesExcluded, set_fileTypesExcluded) def set_lfns(self, value): """ Setter """ if isinstance(value, basestring): value = [value] value = [v.replace(' ', '').replace('//', '/') for v in value] self._lfns = value def get_lfns(self): """ Getter """ return self._lfns lfns = property(get_lfns, set_lfns) ############################################################################################### # # This part was backported from DataIntegrityClient # # # This section contains the specific methods for File Catalog->SE checks # def catalogDirectoryToSE(self, lfnDir): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self._getCatalogDirectoryContents(lfnDir) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def catalogFileToSE(self, lfns): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the FC->SE check") gLogger.info("-" * 40) if isinstance(lfns, basestring): lfns = [lfns] res = self._getCatalogMetadata(lfns) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] res = self._getCatalogReplicas(catalogMetadata.keys()) if not res['OK']: return res replicas, _zeroReplicaFiles = res['Value'] res = self.checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=None): """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ #FIXME: we better use the compareChecksum function instead of this one! or maybe directly checkFC2SE gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) seLfns = {} for lfn, replicaDict in replicas.iteritems(): for se, _url in replicaDict.iteritems(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20))) for se in sorted(seLfns): files = len(seLfns[se]) gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res['OK']: gLogger.error('Failed to get physical file metadata.', res['Message']) return res for lfn, metadata in res['Value'].iteritems(): if lfn in catalogMetadata: if metadata['Size'] != catalogMetadata[lfn][ 'Size']: # and ( metadata['Size'] != 0 ): sizeMismatch.append((lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas(sizeMismatch, se, 'CatalogPFNSizeMismatch') return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """ Check obtain the physical file metadata and check the files are available """ gLogger.info('Checking the integrity of %s physical files at %s' % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get metadata for lfns.', res['Message']) return res lfnMetadataDict = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res['Value']['Failed'].iteritems(): if re.search('File does not exist', reason): missingReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNMissing')) if missingReplicas: self.dic.reportProblematicReplicas(missingReplicas, se, 'PFNMissing') lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, lfnMetadata in lfnMetadataDict.iteritems(): if lfnMetadata['Lost']: lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost')) if lfnMetadata['Unavailable']: unavailableReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNUnavailable')) if not lfnMetadata['Size']: zeroSizeReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNZeroSize')) if lostReplicas: self.dic.reportProblematicReplicas(lostReplicas, se, 'PFNLost') if unavailableReplicas: self.dic.reportProblematicReplicas(unavailableReplicas, se, 'PFNUnavailable') if zeroSizeReplicas: self.dic.reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize') gLogger.info( 'Checking the integrity of physical files at %s complete' % se) return S_OK(lfnMetadataDict) ########################################################################## # # This section contains the specific methods for SE->File Catalog checks # def storageDirectoryToCatalog(self, lfnDir, storageElement): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info("-" * 40) gLogger.info("Performing the SE->FC check at %s" % storageElement) gLogger.info("-" * 40) if isinstance(lfnDir, basestring): lfnDir = [lfnDir] res = self.getStorageDirectoryContents(lfnDir, storageElement) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement) return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}}) def __checkCatalogForSEFiles(self, storageMetadata, storageElement): gLogger.info('Checking %s storage files exist in the catalog' % len(storageMetadata)) res = self.fc.getReplicas(storageMetadata) if not res['OK']: gLogger.error("Failed to get replicas for LFN", res['Message']) return res failedLfns = res['Value']['Failed'] successfulLfns = res['Value']['Successful'] notRegisteredLfns = [] for lfn in storageMetadata: if lfn in failedLfns: if 'No such file or directory' in failedLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) failedLfns.pop(lfn) elif storageElement not in successfulLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) if notRegisteredLfns: self.dic.reportProblematicReplicas(notRegisteredLfns, storageElement, 'LFNNotRegistered') if failedLfns: return S_ERROR(errno.ENOENT, 'Failed to obtain replicas') # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata res = self._getCatalogMetadata(storageMetadata) if not res['OK']: return res catalogMetadata, _missingCatalogFiles, _zeroSizeFiles = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.iteritems(): lfnStorageMetadata = storageMetadata[lfn] if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and ( lfnStorageMetadata['Size'] != 0): sizeMismatch.append((lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.dic.reportProblematicReplicas(sizeMismatch, storageElement, 'CatalogPFNSizeMismatch') gLogger.info('Checking storage files exist in the catalog complete') resDict = { 'CatalogMetadata': catalogMetadata, 'StorageMetadata': storageMetadata } return S_OK(resDict) def getStorageDirectoryContents(self, lfnDir, storageElement): """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ gLogger.info('Obtaining the contents for %s directories at %s' % (len(lfnDir), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnDir) if not res['OK']: gLogger.error("Failed to obtain existance of directories", res['Message']) return res for directory, error in res['Value']['Failed'].iteritems(): gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, 'Failed to determine existance of directory') directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sorted(directoryExists): exists = directoryExists[directory] if exists: activeDirs.append(directory) allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = se.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif currentDir in res['Value']['Failed']: gLogger.error( 'Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) return S_ERROR(errno.ENOENT, res['Value']['Failed'][currentDir]) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( se.getLFNFromURL(dirContents['SubDirs']).get( 'Value', {}).get('Successful', [])) fileURLMetadata = dirContents['Files'] fileMetadata = {} res = se.getLFNFromURL(fileURLMetadata) if not res['OK']: gLogger.error('Failed to get directory content LFNs', res['Message']) return res for url, error in res['Value']['Failed'].iteritems(): gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error)) if res['Value']['Failed']: return S_ERROR(errno.ENOENT, "Failed to get LFNs for PFNs") urlLfns = res['Value']['Successful'] for urlLfn, lfn in urlLfns.iteritems(): fileMetadata[lfn] = fileURLMetadata[urlLfn] allFiles.update(fileMetadata) zeroSizeFiles = [] for lfn in sorted(allFiles): if os.path.basename(lfn) == 'dirac_directory': allFiles.pop(lfn) else: metadata = allFiles[lfn] if not metadata['Size']: zeroSizeFiles.append( (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize')) if zeroSizeFiles: self.dic.reportProblematicReplicas(zeroSizeFiles, storageElement, 'PFNZeroSize') gLogger.info('Obtained at total of %s files for directories at %s' % (len(allFiles), storageElement)) return S_OK(allFiles) def _getCatalogDirectoryContents(self, lfnDirs): """ Obtain the contents of the supplied directory, recursively """ def _getDirectoryContent(directory): """ Inner function: recursively scan a directory, returns list of LFNs """ filesInDirectory = {} gLogger.debug("Examining %s" % directory) res = self.fc.listDirectory(directory) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res if directory in res['Value']['Failed']: gLogger.error( 'Failed to get directory content', '%s %s' % (directory, res['Value']['Failed'][directory])) return S_ERROR('Failed to get directory content') if directory not in res['Value']['Successful']: return S_ERROR('Directory not existing?') # first, adding the files found in the current directory gLogger.debug( "Files in %s: %d" % (directory, len( res['Value']['Successful'][directory]['Files']))) filesInDirectory.update( res['Value']['Successful'][directory]['Files']) #then, looking for subDirectories content if res['Value']['Successful'][directory]['SubDirs']: for l_dir in res['Value']['Successful'][directory]['SubDirs']: #recursion here subDirContent = _getDirectoryContent(l_dir) if not subDirContent['OK']: return subDirContent else: filesInDirectory.update(subDirContent['Value']) return S_OK(filesInDirectory) gLogger.info('Obtaining the catalog contents for %d directories' % len(lfnDirs)) allFiles = {} for lfnDir in lfnDirs: dirContent = _getDirectoryContent(lfnDir) if not dirContent['OK']: return dirContent else: gLogger.debug("Content of directory %s: %d files" % (lfnDir, len(dirContent['Value']))) allFiles.update(dirContent['Value']) gLogger.debug("Content of directories examined: %d files" % len(allFiles)) replicas = self.fc.getReplicas(list(allFiles)) if not replicas['OK']: return replicas if replicas['Value']['Failed']: return S_ERROR("Failures in replicas discovery") return S_OK({ 'Metadata': allFiles, 'Replicas': replicas['Value']['Successful'] }) def _getCatalogReplicas(self, lfns): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info('Obtaining the replicas for %s files' % len(lfns)) zeroReplicaFiles = [] res = self.fc.getReplicas(lfns, allStatus=True) if not res['OK']: gLogger.error('Failed to get catalog replicas', res['Message']) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('File has zero replicas', error): zeroReplicaFiles.append(lfn) gLogger.info('Obtaining the replicas for files complete') return S_OK((allReplicas, zeroReplicaFiles)) def _getCatalogMetadata(self, lfns): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK({}) gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns)) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fc.getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get catalog metadata', res['Message']) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].iteritems(): if re.search('No such file or directory', error): missingCatalogFiles.append(lfn) gLogger.info('Obtaining the catalog metadata complete') return S_OK((allMetadata, missingCatalogFiles, zeroSizeFiles))
class TransformationCLI( cmd.Cmd, API ): def __init__( self ): self.server = TransformationClient() self.indentSpace = 4 cmd.Cmd.__init__( self ) API.__init__( self ) def printPair( self, key, value, separator = ":" ): valueList = value.split( "\n" ) print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() ) for valueLine in valueList[ 1:-1 ]: print "%s %s" % ( " " * self.indentSpace, valueLine.strip() ) def do_exit( self, args ): """ Exits the shell. usage: exit """ sys.exit( 0 ) def do_quit( self, *args ): """ Exits the shell. Usage: quit """ sys.exit( 0 ) def do_help( self, args ): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commans""" cmd.Cmd.do_help( self, args ) # overriting default help command def do_helpall( self, args ): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len( args ) == 0: print "\nAvailable commands:\n" attrList = dir( self ) attrList.sort() for attribute in attrList: if attribute.find( "do_" ) == 0: self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] ) print "" else: command = args.split()[0].strip() try: obj = getattr( self, "do_%s" % command ) except: print "There's no such %s command" % command return self.printPair( command, obj.__doc__[1:] ) def do_shell( self, args ): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall( 0, comm ) if res['OK'] and res['Value'][0] == 0: returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % ( stdOut, stdErr ) else: print res['Message'] def check_params( self, args, num ): """Checks if the number of parameters correct""" argss = string.split( args ) length = len( argss ) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num ) return ( False, length ) return ( argss, length ) def check_id_or_name( self, id_or_name ): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long( id_or_name ) # its look like id return id_or_name def do_setServer( self, args ): """ Set the destination server usage: setServer serverURL """ argss = string.split( args ) if len( argss ) == 0: print "no server provided" self.serverURL = argss[0] self.server.setServer( self.serverURL ) #################################################################### # # These are the methods for transformation manipulation # def do_getall( self, args ): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.setServer( self.serverURL ) oTrans.getTransformations( transStatus = string.split( args ), printOutput = True ) def do_getStatus( self, args ): """Get transformation details usage: getStatus <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation( transName ) if not res['OK']: print "Getting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s: %s" % ( transName, res['Value']['Status'] ) def do_setStatus( self, args ): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = string.split( args ) if not len( argss ) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status ) if not res['OK']: print "Setting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s set to %s" % ( transName, status ) def do_start( self, args ): """Start transformation usage: start <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active' ) if not res['OK']: print "Setting Status of %s failed: %s" % ( transName, res['Message'] ) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' ) if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] ) else: print "%s started" % transName def do_stop( self, args ): """Stop transformation usage: stop <transID|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' ) if not res['OK']: print "Stopping of %s failed: %s" % ( transName, res['Message'] ) else: print "%s stopped" % transName def do_flush( self, args ): """Flush transformation usage: flush <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush' ) if not res['OK']: print "Flushing of %s failed: %s" % ( transName, res['Message'] ) else: print "%s flushing" % transName def do_get( self, args ): """Get transformation definition usage: get <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Body' ) printDict( res['Value'] ) def do_getBody( self, args ): """Get transformation body usage: getBody <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: print res['Value']['Body'] def do_getFileStat( self, args ): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats( transName ) if not res['OK']: print "Failed to get statistics for %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Total' ) printDict( res['Value'] ) def do_modMask( self, args ): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask ) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] ) else: print "Updated %s filemask" % transName def do_getFiles( self, args ): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "No files found" def do_getFileStatus( self, args ): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = string.split( args ) if len( argss ) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append( fileDict ) if filesList: self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_setFileStatus( self, args ): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = string.split( args ) if not len( argss ) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn] ) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile( self, args ): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = string.split( args ) if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) def do_resetProcessedFile( self, args ): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = string.split( args ) if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) #################################################################### # # These are the methods for file manipulation # def do_addDirectory( self, args ): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = string.split( args ) if not len( argss ) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory( directory, force = True ) if not res['OK']: print 'failed to add directory %s: %s' % ( directory, res['Message'] ) else: print 'added %s files for %s' % ( res['Value'], directory ) def do_replicas( self, args ): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = string.split( args ) if not len( argss ) > 0: print "no files supplied" return res = self.server.getReplicas( argss ) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % ( lfn, error ) for lfn in sortList( res['Value']['Successful'].keys() ): ses = sortList( res['Value']['Successful'][lfn].keys() ) outStr = "%s :" % lfn.ljust( 100 ) for se in ses: outStr = "%s %s" % ( outStr, se.ljust( 15 ) ) print outStr def do_addFile( self, args ): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = string.split( args ) if not len( argss ) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addFile( lfnDict, force = True ) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % ( lfn, error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeFile( self, args ): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = string.split( args ) if not len( argss ) > 0: print "no files supplied" return res = self.server.removeFile( argss ) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % ( lfn, error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_addReplica( self, args ): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = string.split( args ) if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addReplica( lfnDict, force = True ) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % ( error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeReplica( self, args ): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = string.split( args ) if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.removeReplica( lfnDict ) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % ( error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_setReplicaStatus( self, args ): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = string.split( args ) if not len( argss ) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.setReplicaStatus( lfnDict ) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % ( error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "updated replica status %s" % lfn
class TransformationAgent( AgentModule ): def initialize( self ): self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' ) self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' ) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/ProductionManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'ProductionManager' ) self.transDB = TransformationClient( 'TransformationDB' ) self.rm = ReplicaManager() return S_OK() def execute( self ): # Get the transformations to process res = self.getTransformations() if not res['OK']: gLogger.info( "%s.execute: Failed to obtain transformations: %s" % ( AGENT_NAME, res['Message'] ) ) return S_OK() # Process the transformations for transDict in res['Value']: transID = long( transDict['TransformationID'] ) gLogger.info( "%s.execute: Processing transformation %s." % ( AGENT_NAME, transID ) ) startTime = time.time() res = self.processTransformation( transDict ) if not res['OK']: gLogger.info( "%s.execute: Failed to process transformation: %s" % ( AGENT_NAME, res['Message'] ) ) else: gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % ( AGENT_NAME, time.time() - startTime ) ) return S_OK() def getTransformations( self ): # Obtain the transformations to be executed transName = self.am_getOption( 'Transformation', 'All' ) if transName == 'All': gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME ) res = self.transDB.getTransformations( {'Status':['Active', 'Completing', 'Flush']}, extraParams = True ) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message'] ) return res transformations = res['Value'] gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % ( AGENT_NAME, len( transformations ) ) ) else: gLogger.info( "%s.getTransformations: Initializing for transformation %s." % ( AGENT_NAME, transName ) ) res = self.transDB.getTransformation( transName, extraParams = True ) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message'] ) return res transformations = [res['Value']] return S_OK( transformations ) def processTransformation( self, transDict ): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} ) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message'] ) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME ) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] ) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME ) return S_OK() # Check the data is available with replicas res = self.__getDataReplicas( transID, lfns, active = ( transDict['Type'].lower() not in ["replication", "removal"] ) ) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message'] ) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key( 'Plugin' ) and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % ( AGENT_NAME, plugin ) ) res = self.__generatePluginObject( plugin ) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters( transDict ) oPlugin.setInputData( dataReplicas ) oPlugin.setTransformationFiles( transFiles ) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message'] ) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation( transID, lfns, se ) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message'] ) allCreated = False else: created += 1 if created: gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % ( AGENT_NAME, created ) ) # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] ) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME ) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject( self, plugin ): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] ) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x ) return S_ERROR() try: evalString = "plugModule.TransformationPlugin('%s')" % plugin return S_OK( eval( evalString ) ) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % ( AGENT_NAME, plugin ), '', x ) return S_ERROR()