def _extend(): """Extends all the tasks""" clip = _Params() clip.registerSwitches() Script.parseCommandLine() from DIRAC import gLogger, exit as dexit if not clip.prod or not clip.tasks: gLogger.error("Production ID is 0 or Tasks is 0, cannot be") dexit(1) from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient tc = TransformationClient() res = tc.getTransformation(clip.prod) trans = res['Value'] transp = trans['Plugin'] if transp != 'Limited': gLogger.error( "This cannot be used on productions that are not using the 'Limited' plugin" ) dexit(0) gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks'])) if clip.tasks > 0: max_tasks = trans['MaxNumberOfTasks'] + clip.tasks groupsize = trans['GroupSize'] gLogger.notice("Adding %s tasks (%s file(s)) to production %s" % (clip.tasks, clip.tasks * groupsize, clip.prod)) elif clip.tasks < 0: max_tasks = -1 gLogger.notice( "Now all existing files in the DB for production %s will be processed." % clip.prod) else: gLogger.error("Number of tasks must be different from 0") dexit(1) res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks', max_tasks) if not res['OK']: gLogger.error(res['Message']) dexit(1) gLogger.notice("Production %s extended!" % clip.prod) dexit(0)
def _extend(): """Extends all the tasks""" clip = _Params() clip.registerSwitches() Script.parseCommandLine() from DIRAC import gLogger, exit as dexit if not clip.prod or not clip.tasks: gLogger.error("Production ID is 0 or Tasks is 0, cannot be") dexit(1) from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient tc = TransformationClient() res = tc.getTransformation(clip.prod) trans= res['Value'] transp = trans['Plugin'] if transp != 'Limited': gLogger.error("This cannot be used on productions that are not using the 'Limited' plugin") dexit(0) gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks']) ) if clip.tasks >0: max_tasks = trans['MaxNumberOfTasks'] + clip.tasks groupsize = trans['GroupSize'] gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %(clip.tasks, clip.tasks*groupsize, clip.prod)) elif clip.tasks <0: max_tasks = -1 gLogger.notice("Now all existing files in the DB for production %s will be processed." % clip.prod) else: gLogger.error("Number of tasks must be different from 0") dexit(1) res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks', max_tasks) if not res['OK']: gLogger.error(res['Message']) dexit(1) gLogger.notice("Production %s extended!" % clip.prod) dexit(0)
class ValidateOutputDataAgent(AgentModule): def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.consistencyInspector = ConsistencyInspector() self.integrityClient = DataIntegrityClient() self.fc = FileCatalog() self.transClient = TransformationClient() self.fileCatalogClient = FileCatalogClient() agentTSTypes = self.am_getOption('TransformationTypes', []) if agentTSTypes: self.transformationTypes = agentTSTypes else: self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge']) self.directoryLocations = sorted( self.am_getOption('DirectoryLocations', ['TransformationDB', 'MetadataCatalog'])) self.transfidmeta = self.am_getOption('TransfIDMeta', "TransformationID") self.enableFlag = True ############################################################################# def initialize(self): """ Sets defaults """ # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') gLogger.info("Will treat the following transformation types: %s" % str(self.transformationTypes)) gLogger.info( "Will search for directories in the following locations: %s" % str(self.directoryLocations)) gLogger.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta) return S_OK() ############################################################################# def execute(self): """ The VerifyOutputData execution method """ self.enableFlag = self.am_getOption('EnableFlag', 'True') if not self.enableFlag == 'True': self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" ) return S_OK('Disabled via CS flag') gLogger.info("-" * 40) self.updateWaitingIntegrity() gLogger.info("-" * 40) res = self.transClient.getTransformations({ 'Status': 'ValidatingOutput', 'Type': self.transformationTypes }) if not res['OK']: gLogger.error("Failed to get ValidatingOutput transformations", res['Message']) return res transDicts = res['Value'] if not transDicts: gLogger.info("No transformations found in ValidatingOutput status") return S_OK() gLogger.info("Found %s transformations in ValidatingOutput status" % len(transDicts)) for transDict in transDicts: transID = transDict['TransformationID'] res = self.checkTransformationIntegrity(int(transID)) if not res['OK']: gLogger.error( "Failed to perform full integrity check for transformation %d" % transID) else: self.finalizeCheck(transID) gLogger.info("-" * 40) return S_OK() def updateWaitingIntegrity(self): """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput' """ gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" ) res = self.transClient.getTransformations( {'Status': 'WaitingIntegrity'}) if not res['OK']: gLogger.error("Failed to get WaitingIntegrity transformations", res['Message']) return res transDicts = res['Value'] if not transDicts: gLogger.info("No transformations found in WaitingIntegrity status") return S_OK() gLogger.info("Found %s transformations in WaitingIntegrity status" % len(transDicts)) for transDict in transDicts: transID = transDict['TransformationID'] gLogger.info("-" * 40) res = self.integrityClient.getTransformationProblematics( int(transID)) if not res['OK']: gLogger.error( "Failed to determine waiting problematics for transformation", res['Message']) elif not res['Value']: res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput') if not res['OK']: gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % (transID)) else: gLogger.info( "Updated status of transformation %s to ValidatedOutput" % (transID)) else: gLogger.info( "%d problematic files for transformation %s were found" % (len(res['Value']), transID)) return ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories(self, transID): """ Get the directories for the supplied transformation from the transformation system """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories']) if not res['OK']: gLogger.error("Failed to obtain transformation directories", res['Message']) return res if not isinstance(res['Value'], list): transDirectories = ast.literal_eval(res['Value']) else: transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if 'MetadataCatalog' in self.directoryLocations: res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta: transID}) if not res['OK']: gLogger.error("Failed to obtain metadata catalog directories", res['Message']) return res transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if not directories: gLogger.info("No output directories found") directories = sorted(directories) return S_OK(directories) @staticmethod def _addDirs(transID, newDirs, existingDirs): for nDir in newDirs: transStr = str(transID).zfill(8) if re.search(transStr, nDir): if nDir not in existingDirs: existingDirs.append(nDir) return existingDirs ############################################################################# def checkTransformationIntegrity(self, transID): """ This method contains the real work """ gLogger.info("-" * 40) gLogger.info("Checking the integrity of transformation %s" % transID) gLogger.info("-" * 40) res = self.getTransformationDirectories(transID) if not res['OK']: return res directories = res['Value'] if not directories: return S_OK() ###################################################### # # This check performs Catalog->SE for possible output directories # res = self.fc.exists(directories) if not res['OK']: gLogger.error('Failed to check directory existence', res['Message']) return res for directory, error in res['Value']['Failed']: gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR("Failed to determine the existance of directories") directoryExists = res['Value']['Successful'] for directory in sorted(directoryExists.keys()): if not directoryExists[directory]: continue iRes = self.consistencyInspector.catalogDirectoryToSE(directory) if not iRes['OK']: gLogger.error(iRes['Message']) return iRes gLogger.info("-" * 40) gLogger.info("Completed integrity check for transformation %s" % transID) return S_OK() def finalizeCheck(self, transID): """ Move to 'WaitingIntegrity' or 'ValidatedOutput' """ res = self.integrityClient.getTransformationProblematics(int(transID)) if not res['OK']: gLogger.error( "Failed to determine whether there were associated problematic files", res['Message']) newStatus = '' elif res['Value']: gLogger.info( "%d problematic files for transformation %s were found" % (len(res['Value']), transID)) newStatus = "WaitingIntegrity" else: gLogger.info("No problematics were found for transformation %s" % transID) newStatus = "ValidatedOutput" if newStatus: res = self.transClient.setTransformationParameter( transID, 'Status', newStatus) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to %s" % (transID, newStatus)) else: gLogger.info("Updated status of transformation %s to %s" % (transID, newStatus)) gLogger.info("-" * 40) return S_OK()
class TransformationCleaningAgent(AgentModule): """ .. class:: TransformationCleaningAgent :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance :param ~TransformationClient.TransformationClient transClient: TransformationClient instance :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance """ def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.shifterProxy = None # # transformation client self.transClient = None # # wms client self.wmsClient = None # # request client self.reqClient = None # # file catalog client self.metadataClient = None # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = ['TransformationDB', 'MetadataCatalog'] # # transformation metadata self.transfidmeta = 'TransformationID' # # archive periof in days self.archiveAfter = 7 # # transformation log SEs self.logSE = 'LogSE' # # enable/disable execution self.enableFlag = 'True' self.dataProcTTypes = ['MCSimulation', 'Merge'] self.dataManipTTypes = ['Replication', 'Removal'] def initialize(self): """ agent initialisation reading and setting confing opts :param self: self reference """ # # shifter proxy # See cleanContent method: this proxy will be used ALSO when the file catalog used # is the DIRAC File Catalog (DFC). # This is possible because of unset of the "UseServerCertificate" option self.shifterProxy = self.am_getOption('shifterProxy', self.shifterProxy) # # transformations types self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', self.dataProcTTypes) self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', self.dataManipTTypes) agentTSTypes = self.am_getOption('TransformationTypes', []) if agentTSTypes: self.transformationTypes = sorted(agentTSTypes) else: self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes) self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes)) # # directory locations self.directoryLocations = sorted( self.am_getOption('DirectoryLocations', self.directoryLocations)) self.log.info( "Will search for directories in the following locations: %s" % str(self.directoryLocations)) # # transformation metadata self.transfidmeta = self.am_getOption('TransfIDMeta', self.transfidmeta) self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta) # # archive periof in days self.archiveAfter = self.am_getOption('ArchiveAfter', self.archiveAfter) # days self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter) # # transformation log SEs self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE) self.log.info("Will remove logs found on storage element: %s" % self.logSE) # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.reqClient = ReqClient() # # file catalog client self.metadataClient = FileCatalogClient() return S_OK() ############################################################################# def execute(self): """ execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag) if self.enableFlag != 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' ) return S_OK('Disabled via CS flag') # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations({ 'Status': 'Cleaning', 'Type': self.transformationTypes }) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeClean(transDict) else: self.log.info( "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)( transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Failed to get transformations", res['Message']) # Obtain the transformations in RemovingFiles status and removes the output files res = self.transClient.getTransformations({ 'Status': 'RemovingFiles', 'Type': self.transformationTypes }) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeRemoval(transDict) else: self.log.info( "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeRemoval)( transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter) res = self.transClient.getTransformations( { 'Status': 'Completed', 'Type': self.transformationTypes }, older=olderThanTime, timeStamp='LastUpdate') if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info( "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)( transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) return S_OK() def _executeClean(self, transDict): """Clean transformation.""" # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # We just archive if transDict['Type'] in self.dataManipTTypes: res = self.archiveTransformation(transDict['TransformationID']) if not res['OK']: self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'], res['Message'])) else: res = self.cleanTransformation(transDict['TransformationID']) if not res['OK']: self.log.error("Problems cleaning transformation %s: %s" % (transDict['TransformationID'], res['Message'])) def _executeRemoval(self, transDict): """Remove files from given transformation.""" res = self.removeTransformationOutput(transDict['TransformationID']) if not res['OK']: self.log.error("Problems removing transformation %s: %s" % (transDict['TransformationID'], res['Message'])) def _executeArchive(self, transDict): """Archive the given transformation.""" res = self.archiveTransformation(transDict['TransformationID']) if not res['OK']: self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'], res['Message'])) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories(self, transID): """ get the directories for the supplied transformation from the transformation system. These directories are used by removeTransformationOutput and cleanTransformation for removing output. :param self: self reference :param int transID: transformation ID """ self.log.verbose( "Cleaning Transformation directories of transformation %d" % transID) directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories']) if not res['OK']: self.log.error("Failed to obtain transformation directories", res['Message']) return res transDirectories = [] if res['Value']: if not isinstance(res['Value'], list): try: transDirectories = ast.literal_eval(res['Value']) except BaseException: # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]' transDirectories.append(res['Value']) else: transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta: transID}) if not res['OK']: self.log.error("Failed to obtain metadata catalog directories", res['Message']) return res transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if not directories: self.log.info("No output directories found") directories = sorted(directories) return S_OK(directories) @classmethod def _addDirs(cls, transID, newDirs, existingDirs): """ append unique :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths """ for folder in newDirs: transStr = str(transID).zfill(8) if re.search(transStr, str(folder)): if folder not in existingDirs: existingDirs.append(os.path.normpath(folder)) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanContent(self, directory): """ wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name """ self.log.verbose("Cleaning Catalog contents") res = self.__getCatalogDirectoryContents([directory]) if not res['OK']: return res filesFound = res['Value'] if not filesFound: self.log.info( "No files are registered in the catalog directory %s" % directory) return S_OK() self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len(filesFound)) # Executing with shifter proxy gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false') res = DataManager().removeFile(filesFound, force=True) gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true') if not res['OK']: return res realFailure = False for lfn, reason in res['Value']['Failed'].items(): if "File does not exist" in str(reason): self.log.warn("File %s not found in some catalog: " % (lfn)) else: self.log.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason)) realFailure = True if realFailure: return S_ERROR("Failed to remove all files found in the catalog") return S_OK() def __getCatalogDirectoryContents(self, directories): """ get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info('Obtaining the catalog contents for %d directories:' % len(directories)) for directory in directories: self.log.info(directory) activeDirs = directories allFiles = {} fc = FileCatalog() while activeDirs: currentDir = activeDirs[0] res = returnSingleResult(fc.listDirectory(currentDir)) activeDirs.remove(currentDir) if not res['OK'] and 'Directory does not exist' in res[ 'Message']: # FIXME: DFC should return errno self.log.info("The supplied directory %s does not exist" % currentDir) elif not res['OK']: if "No such file or directory" in res['Message']: self.log.info("%s: %s" % (currentDir, res['Message'])) else: self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message'])) else: dirContents = res['Value'] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) self.log.info("Found %d files" % len(allFiles)) return S_OK(allFiles.keys()) def cleanTransformationLogFiles(self, directory): """ clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name """ self.log.verbose("Removing log files found in the directory %s" % directory) res = returnSingleResult( StorageElement(self.logSE).removeDirectory(directory, recursive=True)) if not res['OK']: if cmpError(res, errno.ENOENT): # No such file or directory self.log.warn("Transformation log directory does not exist", directory) return S_OK() self.log.error("Failed to remove log files", res['Message']) return res self.log.info("Successfully removed transformation log directory") return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput(self, transID): """ This just removes any mention of the output data from the catalog and storage """ self.log.info("Removing output data for transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % (transID, res)) return S_OK() directories = res['Value'] for directory in directories: if not re.search('/LOG/', directory): res = self.cleanContent(directory) if not res['OK']: return res self.log.info("Removed %d directories from the catalog \ and its files from the storage for transformation %s" % (len(directories), transID)) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles(transID) if not res['OK']: return res self.log.info("Successfully removed output of transformation %d" % transID) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles') if not res['OK']: self.log.error( "Failed to update status of transformation %s to RemovedFiles" % (transID), res['Message']) return res self.log.info("Updated status of transformation %s to RemovedFiles" % (transID)) return S_OK() def archiveTransformation(self, transID): """ This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID """ self.log.info("Archiving transformation %s" % transID) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res['OK']: return res self.log.info("Successfully archived transformation %d" % transID) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived') if not res['OK']: self.log.error( "Failed to update status of transformation %s to Archived" % (transID), res['Message']) return res self.log.info("Updated status of transformation %s to Archived" % (transID)) return S_OK() def cleanTransformation(self, transID): """ This removes what was produced by the supplied transformation, leaving only some info and log in the transformation DB. """ self.log.info("Cleaning transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % (transID, res)) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search('/LOG/', directory): res = self.cleanTransformationLogFiles(directory) if not res['OK']: return res res = self.cleanContent(directory) if not res['OK']: return res # Clean ALL the possible remnants found res = self.cleanMetadataCatalogFiles(transID) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res['OK']: return res self.log.info("Successfully cleaned transformation %d" % transID) res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned') if not res['OK']: self.log.error( "Failed to update status of transformation %s to Cleaned" % (transID), res['Message']) return res self.log.info("Updated status of transformation %s to Cleaned" % (transID)) return S_OK() def cleanMetadataCatalogFiles(self, transID): """ wipe out files from catalog """ res = self.metadataClient.findFilesByMetadata( {self.transfidmeta: transID}) if not res['OK']: return res fileToRemove = res['Value'] if not fileToRemove: self.log.info('No files found for transID %s' % transID) return S_OK() # Executing with shifter proxy gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false') res = DataManager().removeFile(fileToRemove, force=True) gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true') if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason)) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog") self.log.info("Successfully removed all files found in the BK") return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks(self, transID): """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation """ self.log.verbose("Cleaning Transformation tasks of transformation %d" % transID) res = self.__getTransformationExternalIDs(transID) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type']) if not res['OK']: self.log.error("Failed to determine transformation type") return res transType = res['Value'] if transType in self.dataProcTTypes: res = self.__removeWMSTasks(externalIDs) else: res = self.__removeRequests(externalIDs) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs(self, transID): """ collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID """ res = self.transClient.getTransformationTasks( condDict={'TransformationID': transID}) if not res['OK']: self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message']) return res externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]] self.log.info("Found %d tasks for transformation" % len(externalIDs)) return S_OK(externalIDs) def __removeRequests(self, requestIDs): """ This will remove requests from the RMS system - """ rIDs = [int(long(j)) for j in requestIDs if long(j)] for reqID in rIDs: self.reqClient.cancelRequest(reqID) return S_OK() def __removeWMSTasks(self, transJobIDs): """ wipe out jobs and their requests from the system :param self: self reference :param list trasnJobIDs: job IDs """ # Prevent 0 job IDs jobIDs = [int(j) for j in transJobIDs if int(j)] allRemove = True for jobList in breakListIntoChunks(jobIDs, 500): res = self.wmsClient.killJob(jobList) if res['OK']: self.log.info("Successfully killed %d jobs from WMS" % len(jobList)) elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res): self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs'])) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to kill %s jobs because not authorized" % len(res['NonauthorizedJobIDs'])) allRemove = False elif "FailedJobIDs" in res: self.log.error("Failed to kill %s jobs" % len(res['FailedJobIDs'])) allRemove = False res = self.wmsClient.deleteJob(jobList) if res['OK']: self.log.info("Successfully removed %d jobs from WMS" % len(jobList)) elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res): self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs'])) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to remove %s jobs because not authorized" % len(res['NonauthorizedJobIDs'])) allRemove = False elif "FailedJobIDs" in res: self.log.error("Failed to remove %s jobs" % len(res['FailedJobIDs'])) allRemove = False if not allRemove: return S_ERROR("Failed to remove all remnants from WMS") self.log.info("Successfully removed all tasks from the WMS") if not jobIDs: self.log.info( "JobIDs not present, unable to remove asociated requests.") return S_OK() failed = 0 failoverRequests = {} res = self.reqClient.getRequestIDsForJobs(jobIDs) if not res['OK']: self.log.error("Failed to get requestID for jobs.", res['Message']) return res failoverRequests.update(res['Value']['Successful']) if not failoverRequests: return S_OK() for jobID, requestID in res['Value']['Successful'].items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = self.reqClient.cancelRequest(requestID) if not res['OK']: self.log.error("Failed to remove request from RequestDB", res['Message']) failed += 1 else: self.log.verbose("Removed request %s associated to job %d." % (requestID, jobID)) if failed: self.log.info("Successfully removed %s requests" % (len(failoverRequests) - failed)) self.log.info("Failed to remove %s requests" % failed) return S_ERROR("Failed to remove all the request from RequestDB") self.log.info( "Successfully removed all the associated failover requests") return S_OK()
class ValidateOutputDataAgent( AgentModule ): def __init__( self, *args, **kwargs ): """ c'tor """ AgentModule.__init__( self, *args, **kwargs ) self.integrityClient = DataIntegrityClient() self.fc = FileCatalog() self.transClient = TransformationClient() self.fileCatalogClient = FileCatalogClient() agentTSTypes = self.am_getOption( 'TransformationTypes', [] ) if agentTSTypes: self.transformationTypes = agentTSTypes else: self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'MetadataCatalog'] ) ) self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) ) self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) self.enableFlag = True ############################################################################# def initialize( self ): """ Sets defaults """ # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) gLogger.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) ) gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) return S_OK() ############################################################################# def execute( self ): """ The VerifyOutputData execution method """ self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" ) return S_OK( 'Disabled via CS flag' ) gLogger.info( "-" * 40 ) self.updateWaitingIntegrity() gLogger.info( "-" * 40 ) res = self.transClient.getTransformations( {'Status':'ValidatingOutput', 'Type':self.transformationTypes} ) if not res['OK']: gLogger.error( "Failed to get ValidatingOutput transformations", res['Message'] ) return res transDicts = res['Value'] if not transDicts: gLogger.info( "No transformations found in ValidatingOutput status" ) return S_OK() gLogger.info( "Found %s transformations in ValidatingOutput status" % len( transDicts ) ) for transDict in transDicts: transID = transDict['TransformationID'] res = self.checkTransformationIntegrity( int( transID ) ) if not res['OK']: gLogger.error( "Failed to perform full integrity check for transformation %d" % transID ) else: self.finalizeCheck( transID ) gLogger.info( "-" * 40 ) return S_OK() def updateWaitingIntegrity( self ): """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput' """ gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" ) res = self.transClient.getTransformations( {'Status':'WaitingIntegrity'} ) if not res['OK']: gLogger.error( "Failed to get WaitingIntegrity transformations", res['Message'] ) return res transDicts = res['Value'] if not transDicts: gLogger.info( "No transformations found in WaitingIntegrity status" ) return S_OK() gLogger.info( "Found %s transformations in WaitingIntegrity status" % len( transDicts ) ) for transDict in transDicts: transID = transDict['TransformationID'] gLogger.info( "-" * 40 ) res = self.integrityClient.getTransformationProblematics( int( transID ) ) if not res['OK']: gLogger.error( "Failed to determine waiting problematics for transformation", res['Message'] ) elif not res['Value']: res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput' ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % ( transID ) ) else: gLogger.info( "Updated status of transformation %s to ValidatedOutput" % ( transID ) ) else: gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) ) return ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): """ Get the directories for the supplied transformation from the transformation system """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: gLogger.error( "Failed to obtain transformation directories", res['Message'] ) return res transDirectories = res['Value'].splitlines() directories = self._addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if not directories: gLogger.info( "No output directories found" ) directories = sorted( directories ) return S_OK( directories ) @staticmethod def _addDirs( transID, newDirs, existingDirs ): for nDir in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, nDir ): if not nDir in existingDirs: existingDirs.append( nDir ) return existingDirs ############################################################################# def checkTransformationIntegrity( self, transID ): """ This method contains the real work """ gLogger.info( "-" * 40 ) gLogger.info( "Checking the integrity of transformation %s" % transID ) gLogger.info( "-" * 40 ) res = self.getTransformationDirectories( transID ) if not res['OK']: return res directories = res['Value'] if not directories: return S_OK() ###################################################### # # This check performs Catalog->SE for possible output directories # res = self.fc.exists( directories ) if not res['OK']: gLogger.error( res['Message'] ) return res for directory, error in res['Value']['Failed']: gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( "Failed to determine the existance of directories" ) directoryExists = res['Value']['Successful'] for directory in sorted( directoryExists.keys() ): if not directoryExists[directory]: continue iRes = self.integrityClient.catalogDirectoryToSE( directory ) if not iRes['OK']: gLogger.error( iRes['Message'] ) return iRes ###################################################### # # This check performs SE->Catalog for possible output directories # for storageElementName in sorted( self.activeStorages ): res = self.integrityClient.storageDirectoryToCatalog( directories, storageElementName ) if not res['OK']: gLogger.error( res['Message'] ) return res gLogger.info( "-" * 40 ) gLogger.info( "Completed integrity check for transformation %s" % transID ) return S_OK() def finalizeCheck( self, transID ): """ Move to 'WaitingIntegrity' or 'ValidatedOutput' """ res = self.integrityClient.getTransformationProblematics( int( transID ) ) if not res['OK']: gLogger.error( "Failed to determine whether there were associated problematic files", res['Message'] ) newStatus = '' elif res['Value']: gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) ) newStatus = "WaitingIntegrity" else: gLogger.info( "No problematics were found for transformation %s" % transID ) newStatus = "ValidatedOutput" if newStatus: res = self.transClient.setTransformationParameter( transID, 'Status', newStatus ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to %s" % ( transID, newStatus ) ) else: gLogger.info( "Updated status of transformation %s to %s" % ( transID, newStatus ) ) gLogger.info( "-" * 40 ) return S_OK()
class TransformationCLI(CLI, API): def __init__(self): self.server = TransformationClient() self.indentSpace = 4 CLI.__init__(self) API.__init__(self) def printPair(self, key, value, separator=":"): valueList = value.split("\n") print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)), separator, valueList[0].strip()) for valueLine in valueList[1:-1]: print "%s %s" % (" " * self.indentSpace, valueLine.strip()) def do_help(self, args): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commands""" CLI.do_help(self, args) # overriting default help command def do_helpall(self, args): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len(args) == 0: print "\nAvailable commands:\n" attrList = dir(self) attrList.sort() for attribute in attrList: if attribute.find("do_") == 0: self.printPair(attribute[3:], getattr(self, attribute).__doc__[1:]) print "" else: command = args.split()[0].strip() try: obj = getattr(self, "do_%s" % command) except: print "There's no such %s command" % command return self.printPair(command, obj.__doc__[1:]) def do_shell(self, args): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall(0, comm) if res['OK'] and res['Value'][0] == 0: _returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % (stdOut, stdErr) else: print res['Message'] def check_params(self, args, num): """Checks if the number of parameters correct""" argss = args.split() length = len(argss) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num) return (False, length) return (argss, length) def check_id_or_name(self, id_or_name): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long(id_or_name) # its look like id return id_or_name #################################################################### # # These are the methods for transformation manipulation # def do_getall(self, args): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.getTransformations(transStatus=args.split(), printOutput=True) def do_getAllByUser(self, args): """Get all transformations created by a given user The first argument is the authorDN or username. The authorDN is preferred: it need to be inside quotes because contains white spaces. Only authorDN should be quoted. When the username is provided instead, the authorDN is retrieved from the uploaded proxy, so that the retrieved transformations are those created by the user who uploaded that proxy: that user could be different that the username provided to the function. usage: getAllByUser authorDN or username [Status] [Status] """ oTrans = Transformation() argss = args.split() username = "" author = "" status = [] if not len(argss) > 0: print self.do_getAllByUser.__doc__ return # if the user didnt quoted the authorDN ends if '=' in argss[0] and argss[0][0] not in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return if argss[0][0] in ["'", '"']: # authorDN given author = argss[0] status_idx = 1 for arg in argss[1:]: author += ' ' + arg status_idx += 1 if arg[-1] in ["'", '"']: break # At this point we should have something like 'author' if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return else: author = author[1:-1] # throw away the quotes # the rest are the requested status status = argss[status_idx:] else: # username given username = argss[0] status = argss[1:] oTrans.getTransformationsByUser(authorDN=author, userName=username, transStatus=status, printOutput=True) def do_summaryTransformations(self, args): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. Usage: summaryTransformations <ProdID> [<ProdID> ...] """ argss = args.split() if not len(argss) > 0: print self.do_summaryTransformations.__doc__ return transid = argss oTrans = Transformation() oTrans.getSummaryTransformations(transID=transid) def do_getStatus(self, args): """Get transformation details usage: getStatus <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation(transName) if not res['OK']: print "Getting status of %s failed: %s" % (transName, res['Message']) else: print "%s: %s" % (transName, res['Value']['Status']) def do_setStatus(self, args): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = args.split() if not len(argss) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status) if not res['OK']: print "Setting status of %s failed: %s" % (transName, res['Message']) else: print "%s set to %s" % (transName, status) def do_start(self, args): """Start transformation usage: start <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active') if not res['OK']: print "Setting Status of %s failed: %s" % (transName, res['Message']) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic') if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message']) else: print "%s started" % transName def do_stop(self, args): """Stop transformation usage: stop <transID|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual') if not res['OK']: print "Stopping of %s failed: %s" % (transName, res['Message']) else: print "%s stopped" % transName def do_flush(self, args): """Flush transformation usage: flush <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush') if not res['OK']: print "Flushing of %s failed: %s" % (transName, res['Message']) else: print "%s flushing" % transName def do_get(self, args): """Get transformation definition usage: get <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: res['Value'].pop('Body') printDict(res['Value']) def do_getBody(self, args): """Get transformation body usage: getBody <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: print res['Value']['Body'] def do_getFileStat(self, args): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats(transName) if not res['OK']: print "Failed to get statistics for %s: %s" % (transName, res['Message']) else: res['Value'].pop('Total') printDict(res['Value']) def do_modMask(self, args): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message']) else: print "Updated %s filemask" % transName def do_getFiles(self, args): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN') else: print "No files found" def do_getFileStatus(self, args): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = args.split() if len(argss) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append(fileDict) if filesList: self._printFormattedDictList(filesList, [ 'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate' ], 'LFN', 'LFN') else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_getOutputFiles(self, args): """Get output files for the transformation usage: getOutputFiles <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: fc = FileCatalog() meta = {} meta['ProdID'] = transName res = fc.findFilesByMetadata(meta) if not res['OK']: print res['Message'] return if not len(res['Value']) > 0: print 'No output files yet for transformation %d' % int( transName) return else: for lfn in res['Value']: print lfn def do_getInputDataQuery(self, args): """Get input data query for the transformation usage: getInputDataQuery <transName|ID> """ argss = args.split() if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationInputDataQuery(transName) if not res['OK']: print "Failed to get transformation input data query: %s" % res[ 'Message'] else: print res['Value'] def do_setFileStatus(self, args): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = args.split() if not len(argss) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn]) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile(self, args): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfns> """ argss = args.split() if not len(argss) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if 'Failed' in res['Value']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns) def do_resetProcessedFile(self, args): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = args.split() if not len(argss) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation(transName, 'Unused', lfns, force=True) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if 'Failed' in res['Value'] and res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns) #################################################################### # # These are the methods for file manipulation # def do_addDirectory(self, args): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = args.split() if not len(argss) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory(directory, force=True) if not res['OK']: print 'failed to add directory %s: %s' % (directory, res['Message']) else: print 'added %s files for %s' % (res['Value'], directory) def do_replicas(self, args): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print "no files supplied" return res = self.server.getReplicas(argss) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % (lfn, error) for lfn in sorted(res['Value']['Successful'].keys()): ses = sorted(res['Value']['Successful'][lfn].keys()) outStr = "%s :" % lfn.ljust(100) for se in ses: outStr = "%s %s" % (outStr, se.ljust(15)) print outStr def do_addFile(self, args): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': 'IGNORED-SE', 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addFile(lfnDict, force=True) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % (lfn, error) for lfn in sorted(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeFile(self, args): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = args.split() if not len(argss) > 0: print "no files supplied" return res = self.server.removeFile(argss) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % (lfn, error) for lfn in sorted(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_addReplica(self, args): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = args.split() if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addReplica(lfnDict, force=True) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % (error) for lfn in sorted(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeReplica(self, args): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = args.split() if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.removeReplica(lfnDict) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % (error) for lfn in sorted(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_setReplicaStatus(self, args): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = args.split() if not len(argss) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = { 'Status': status, 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.setReplicaStatus(lfnDict) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sorted(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % (error) for lfn in sorted(res['Value']['Successful'].keys()): print "updated replica status %s" % lfn
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.supportedPlugins = ['Broadcast', 'Standard', 'BySize', 'ByShare'] if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError, 'TransformationID %d does not exist' % transID else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def __setSE( self, se, seList ): if type( seList ) in types.StringTypes: try: seList = eval( seList ) except: seList = seList.replace( ',', ' ' ).split() res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = se return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam( self, value ): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key( self.item_called ): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def addTransformation( self, addFiles = True, printOutput = False ): res = self._checkCreation() if not res['OK']: return self._errorReport( res, 'Failed transformation sanity check' ) if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint( self.paramValues ) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup = self.paramValues['TransformationGroup'], groupSize = self.paramValues['GroupSize'], inheritedFrom = self.paramValues['InheritedFrom'], body = self.paramValues['Body'], maxTasks = self.paramValues['MaxNumberOfTasks'], eventsPerTask = self.paramValues['EventsPerTask'], addFiles = addFiles ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transID = res['Value'] self.exists = True self.setTransformationID( transID ) gLogger.info( "Created transformation %d" % transID ) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key( paramName ): res = self.transClient.setTransformationParameter( transID, paramName, paramValue ) if not res['OK']: gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) ) gLogger.info( "To add this parameter later please execute the following." ) gLogger.info( "oTransformation = Transformation(%d)" % transID ) gLogger.info( "oTransformation.set%s(...)" % paramName ) return S_OK( transID ) def _checkCreation( self ): if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info( "oTransformation.reset()" ) return S_ERROR() requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type'] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter ) res = self.__promptForParameter( parameter ) if not res['OK']: return res plugin = self.paramValues['Plugin'] if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin ) res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' ) if not res['OK']: return res plugin = self.paramValues['Plugin'] checkPlugin = "_check%sPlugin" % plugin fcn = None if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ): fcn = getattr( self, checkPlugin ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin ) res = fcn() return res def _checkBySizePlugin( self ): return self._checkStandardPlugin() def _checkBySharePlugin( self ): return self._checkStandardPlugin() def _checkStandardPlugin( self ): groupSize = self.paramValues['GroupSize'] if ( groupSize <= 0 ): gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize( 1 ) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin( self ): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( string.join( ['SourceSE', 'TargetSE'], ', ' ) ) ) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if ( not self.paramValues.has_key( requiredParam ) ) or ( not self.paramValues[requiredParam] ): res = self.__promptForParameter( requiredParam, insert = False ) if not res['OK']: return res paramValue = res['Value'] setter = None setterName = "set%s" % requiredParam if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName ) ses = paramValue.replace( ',', ' ' ).split() res = setter( ses ) if not res['OK']: return res return S_OK() def __checkSEs( self, seList ): res = gConfig.getSections( '/Resources/StorageElements' ) if not res['OK']: return self._errorReport( res, 'Failed to get possible StorageElements' ) missing = [] for se in seList: if not se in res['Value']: gLogger.error( "StorageElement %s is not known" % se ) missing.append( se ) if missing: return S_ERROR( "%d StorageElements not known" % len( missing ) ) return S_OK() def __promptForParameter( self, parameter, choices = [], default = '', insert = True ): res = promptUser( "Please enter %s" % parameter, choices = choices, default = default ) if not res['OK']: return self._errorReport( res ) gLogger.info( "%s will be set to '%s'" % ( parameter, res['Value'] ) ) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter( paramValue ) if not res['OK']: return res return S_OK( paramValue )
class TransformationCleaningAgent(AgentModule): """ .. class:: TransformationCleaningAgent :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance :param ~TransformationClient.TransformationClient transClient: TransformationClient instance :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance """ def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.shifterProxy = None # # transformation client self.transClient = None # # wms client self.wmsClient = None # # request client self.reqClient = None # # file catalog client self.metadataClient = None # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = ['TransformationDB', 'MetadataCatalog'] # # transformation metadata self.transfidmeta = 'TransformationID' # # archive periof in days self.archiveAfter = 7 # # transformation log SEs self.logSE = 'LogSE' # # enable/disable execution self.enableFlag = 'True' self.dataProcTTypes = ['MCSimulation', 'Merge'] self.dataManipTTypes = ['Replication', 'Removal'] def initialize(self): """ agent initialisation reading and setting confing opts :param self: self reference """ # # shifter proxy # See cleanContent method: this proxy will be used ALSO when the file catalog used # is the DIRAC File Catalog (DFC). # This is possible because of unset of the "UseServerCertificate" option self.shifterProxy = self.am_getOption('shifterProxy', self.shifterProxy) # # transformations types self.dataProcTTypes = Operations().getValue('Transformations/DataProcessing', self.dataProcTTypes) self.dataManipTTypes = Operations().getValue('Transformations/DataManipulation', self.dataManipTTypes) agentTSTypes = self.am_getOption('TransformationTypes', []) if agentTSTypes: self.transformationTypes = sorted(agentTSTypes) else: self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes) self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes)) # # directory locations self.directoryLocations = sorted(self.am_getOption('DirectoryLocations', self.directoryLocations)) self.log.info("Will search for directories in the following locations: %s" % str(self.directoryLocations)) # # transformation metadata self.transfidmeta = self.am_getOption('TransfIDMeta', self.transfidmeta) self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta) # # archive periof in days self.archiveAfter = self.am_getOption('ArchiveAfter', self.archiveAfter) # days self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter) # # transformation log SEs self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE) self.log.info("Will remove logs found on storage element: %s" % self.logSE) # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.reqClient = ReqClient() # # file catalog client self.metadataClient = FileCatalogClient() return S_OK() ############################################################################# def execute(self): """ execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag) if self.enableFlag != 'True': self.log.info('TransformationCleaningAgent is disabled by configuration option EnableFlag') return S_OK('Disabled via CS flag') # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations({'Status': 'Cleaning', 'Type': self.transformationTypes}) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeClean(transDict) else: self.log.info("Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)(transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Failed to get transformations", res['Message']) # Obtain the transformations in RemovingFiles status and removes the output files res = self.transClient.getTransformations({'Status': 'RemovingFiles', 'Type': self.transformationTypes}) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeRemoval(transDict) else: self.log.info("Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeRemoval)(transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter) res = self.transClient.getTransformations({'Status': 'Completed', 'Type': self.transformationTypes}, older=olderThanTime, timeStamp='LastUpdate') if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info("Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)(transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) return S_OK() def _executeClean(self, transDict): """Clean transformation.""" # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # We just archive if transDict['Type'] in self.dataManipTTypes: res = self.archiveTransformation(transDict['TransformationID']) if not res['OK']: self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'], res['Message'])) else: res = self.cleanTransformation(transDict['TransformationID']) if not res['OK']: self.log.error("Problems cleaning transformation %s: %s" % (transDict['TransformationID'], res['Message'])) def _executeRemoval(self, transDict): """Remove files from given transformation.""" res = self.removeTransformationOutput(transDict['TransformationID']) if not res['OK']: self.log.error("Problems removing transformation %s: %s" % (transDict['TransformationID'], res['Message'])) def _executeArchive(self, transDict): """Archive the given transformation.""" res = self.archiveTransformation(transDict['TransformationID']) if not res['OK']: self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'], res['Message'])) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories(self, transID): """ get the directories for the supplied transformation from the transformation system. These directories are used by removeTransformationOutput and cleanTransformation for removing output. :param self: self reference :param int transID: transformation ID """ self.log.verbose("Cleaning Transformation directories of transformation %d" % transID) directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters(transID, ['OutputDirectories']) if not res['OK']: self.log.error("Failed to obtain transformation directories", res['Message']) return res transDirectories = [] if res['Value']: if not isinstance(res['Value'], list): try: transDirectories = ast.literal_eval(res['Value']) except BaseException: # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]' transDirectories.append(res['Value']) else: transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata({self.transfidmeta: transID}) if not res['OK']: self.log.error("Failed to obtain metadata catalog directories", res['Message']) return res transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if not directories: self.log.info("No output directories found") directories = sorted(directories) return S_OK(directories) @classmethod def _addDirs(cls, transID, newDirs, existingDirs): """ append unique :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths """ for folder in newDirs: transStr = str(transID).zfill(8) if re.search(transStr, str(folder)): if folder not in existingDirs: existingDirs.append(os.path.normpath(folder)) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanContent(self, directory): """ wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name """ self.log.verbose("Cleaning Catalog contents") res = self.__getCatalogDirectoryContents([directory]) if not res['OK']: return res filesFound = res['Value'] if not filesFound: self.log.info("No files are registered in the catalog directory %s" % directory) return S_OK() self.log.info("Attempting to remove %d possible remnants from the catalog and storage" % len(filesFound)) # Executing with shifter proxy gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false') res = DataManager().removeFile(filesFound, force=True) gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true') if not res['OK']: return res realFailure = False for lfn, reason in res['Value']['Failed'].items(): if "File does not exist" in str(reason): self.log.warn("File %s not found in some catalog: " % (lfn)) else: self.log.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason)) realFailure = True if realFailure: return S_ERROR("Failed to remove all files found in the catalog") return S_OK() def __getCatalogDirectoryContents(self, directories): """ get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info('Obtaining the catalog contents for %d directories:' % len(directories)) for directory in directories: self.log.info(directory) activeDirs = directories allFiles = {} fc = FileCatalog() while activeDirs: currentDir = activeDirs[0] res = returnSingleResult(fc.listDirectory(currentDir)) activeDirs.remove(currentDir) if not res['OK'] and 'Directory does not exist' in res['Message']: # FIXME: DFC should return errno self.log.info("The supplied directory %s does not exist" % currentDir) elif not res['OK']: if "No such file or directory" in res['Message']: self.log.info("%s: %s" % (currentDir, res['Message'])) else: self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message'])) else: dirContents = res['Value'] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) self.log.info("Found %d files" % len(allFiles)) return S_OK(allFiles.keys()) def cleanTransformationLogFiles(self, directory): """ clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name """ self.log.verbose("Removing log files found in the directory %s" % directory) res = returnSingleResult(StorageElement(self.logSE).removeDirectory(directory, recursive=True)) if not res['OK']: if cmpError(res, errno.ENOENT): # No such file or directory self.log.warn("Transformation log directory does not exist", directory) return S_OK() self.log.error("Failed to remove log files", res['Message']) return res self.log.info("Successfully removed transformation log directory") return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput(self, transID): """ This just removes any mention of the output data from the catalog and storage """ self.log.info("Removing output data for transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res['OK']: self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res)) return S_OK() directories = res['Value'] for directory in directories: if not re.search('/LOG/', directory): res = self.cleanContent(directory) if not res['OK']: return res self.log.info("Removed %d directories from the catalog \ and its files from the storage for transformation %s" % (len(directories), transID)) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles(transID) if not res['OK']: return res self.log.info("Successfully removed output of transformation %d" % transID) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter(transID, 'Status', 'RemovedFiles') if not res['OK']: self.log.error("Failed to update status of transformation %s to RemovedFiles" % (transID), res['Message']) return res self.log.info("Updated status of transformation %s to RemovedFiles" % (transID)) return S_OK() def archiveTransformation(self, transID): """ This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID """ self.log.info("Archiving transformation %s" % transID) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res['OK']: return res self.log.info("Successfully archived transformation %d" % transID) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter(transID, 'Status', 'Archived') if not res['OK']: self.log.error("Failed to update status of transformation %s to Archived" % (transID), res['Message']) return res self.log.info("Updated status of transformation %s to Archived" % (transID)) return S_OK() def cleanTransformation(self, transID): """ This removes what was produced by the supplied transformation, leaving only some info and log in the transformation DB. """ self.log.info("Cleaning transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res['OK']: self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res)) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search('/LOG/', directory): res = self.cleanTransformationLogFiles(directory) if not res['OK']: return res res = self.cleanContent(directory) if not res['OK']: return res # Clean ALL the possible remnants found res = self.cleanMetadataCatalogFiles(transID) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res['OK']: return res self.log.info("Successfully cleaned transformation %d" % transID) res = self.transClient.setTransformationParameter(transID, 'Status', 'Cleaned') if not res['OK']: self.log.error("Failed to update status of transformation %s to Cleaned" % (transID), res['Message']) return res self.log.info("Updated status of transformation %s to Cleaned" % (transID)) return S_OK() def cleanMetadataCatalogFiles(self, transID): """ wipe out files from catalog """ res = self.metadataClient.findFilesByMetadata({self.transfidmeta: transID}) if not res['OK']: return res fileToRemove = res['Value'] if not fileToRemove: self.log.info('No files found for transID %s' % transID) return S_OK() # Executing with shifter proxy gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false') res = DataManager().removeFile(fileToRemove, force=True) gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true') if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason)) if res['Value']['Failed']: return S_ERROR("Failed to remove all files found in the metadata catalog") self.log.info("Successfully removed all files found in the BK") return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks(self, transID): """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation """ self.log.verbose("Cleaning Transformation tasks of transformation %d" % transID) res = self.__getTransformationExternalIDs(transID) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters(transID, ['Type']) if not res['OK']: self.log.error("Failed to determine transformation type") return res transType = res['Value'] if transType in self.dataProcTTypes: res = self.__removeWMSTasks(externalIDs) else: res = self.__removeRequests(externalIDs) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs(self, transID): """ collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID """ res = self.transClient.getTransformationTasks(condDict={'TransformationID': transID}) if not res['OK']: self.log.error("Failed to get externalIDs for transformation %d" % transID, res['Message']) return res externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]] self.log.info("Found %d tasks for transformation" % len(externalIDs)) return S_OK(externalIDs) def __removeRequests(self, requestIDs): """ This will remove requests from the RMS system - """ rIDs = [int(long(j)) for j in requestIDs if long(j)] for reqID in rIDs: self.reqClient.cancelRequest(reqID) return S_OK() def __removeWMSTasks(self, transJobIDs): """ wipe out jobs and their requests from the system :param self: self reference :param list trasnJobIDs: job IDs """ # Prevent 0 job IDs jobIDs = [int(j) for j in transJobIDs if int(j)] allRemove = True for jobList in breakListIntoChunks(jobIDs, 500): res = self.wmsClient.killJob(jobList) if res['OK']: self.log.info("Successfully killed %d jobs from WMS" % len(jobList)) elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res): self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs'])) elif "NonauthorizedJobIDs" in res: self.log.error("Failed to kill %s jobs because not authorized" % len(res['NonauthorizedJobIDs'])) allRemove = False elif "FailedJobIDs" in res: self.log.error("Failed to kill %s jobs" % len(res['FailedJobIDs'])) allRemove = False res = self.wmsClient.deleteJob(jobList) if res['OK']: self.log.info("Successfully removed %d jobs from WMS" % len(jobList)) elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res): self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs'])) elif "NonauthorizedJobIDs" in res: self.log.error("Failed to remove %s jobs because not authorized" % len(res['NonauthorizedJobIDs'])) allRemove = False elif "FailedJobIDs" in res: self.log.error("Failed to remove %s jobs" % len(res['FailedJobIDs'])) allRemove = False if not allRemove: return S_ERROR("Failed to remove all remnants from WMS") self.log.info("Successfully removed all tasks from the WMS") if not jobIDs: self.log.info("JobIDs not present, unable to remove asociated requests.") return S_OK() failed = 0 failoverRequests = {} res = self.reqClient.getRequestIDsForJobs(jobIDs) if not res['OK']: self.log.error("Failed to get requestID for jobs.", res['Message']) return res failoverRequests.update(res['Value']['Successful']) if not failoverRequests: return S_OK() for jobID, requestID in res['Value']['Successful'].items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = self.reqClient.cancelRequest(requestID) if not res['OK']: self.log.error("Failed to remove request from RequestDB", res['Message']) failed += 1 else: self.log.verbose("Removed request %s associated to job %d." % (requestID, jobID)) if failed: self.log.info("Successfully removed %s requests" % (len(failoverRequests) - failed)) self.log.info("Failed to remove %s requests" % failed) return S_ERROR("Failed to remove all the request from RequestDB") self.log.info("Successfully removed all the associated failover requests") return S_OK()
t.setTransformationName(transferName) # Must be unique t.setTransformationGroup("Transfer") t.setType("Transfer-JUNO") #t.setPlugin("Standard") # Not needed. The default is 'Standard' t.setDescription("Test Data Transfer") t.setLongDescription( "Long description of Data Transfer" ) # Mandatory t.setGroupSize(3) # Here you specify how many files should be grouped within he same request, e.g. 100 transBody = [ ( "ReplicateAndRegister", { "SourceSE": fromSE, "TargetSE": toSE }) ] t.setBody ( transBody ) # Mandatory result = t.addTransformation() # Transformation is created here if not result['OK']: gLogger.error('Can not add transformation: %s' % result['Message']) exit(2) t.setStatus("Active") t.setAgentType("Automatic") transID = t.getTransformationID() result = tc.addFilesToTransformation(transID['Value'], infileList) # Files are added here if not result['OK']: gLogger.error('Can not add files to transformation: %s' % result['Message']) exit(2) result = tc.setTransformationParameter( transID['Value'], 'Status', 'Flush' ) if not result['OK']: gLogger.error('Can not flush transformation: %s' % result['Message']) exit(2)
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { 'TransformationID': [types.IntType, types.LongType], 'TransformationName': types.StringTypes, 'Status': types.StringTypes, 'Description': types.StringTypes, 'LongDescription': types.StringTypes, 'Type': types.StringTypes, 'Plugin': types.StringTypes, 'AgentType': types.StringTypes, 'FileMask': types.StringTypes, 'TransformationGroup': types.StringTypes, 'GroupSize': [types.IntType, types.LongType, types.FloatType], 'InheritedFrom': [types.IntType, types.LongType], 'Body': types.StringTypes, 'MaxNumberOfTasks': [types.IntType, types.LongType], 'EventsPerTask': [types.IntType, types.LongType] } self.paramValues = { 'TransformationID': 0, 'TransformationName': '', 'Status': 'New', 'Description': '', 'LongDescription': '', 'Type': '', 'Plugin': 'Standard', 'AgentType': 'Manual', 'FileMask': '', 'TransformationGroup': 'General', 'GroupSize': 1, 'InheritedFrom': 0, 'Body': '', 'MaxNumberOfTasks': 0, 'EventsPerTask': 0 } self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError('TransformationID %d does not exist' % transID) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE('TargetSE', seList) def setSourceSE(self, seList): return self.__setSE('SourceSE', seList) def __setSE(self, seParam, seList): if isinstance(seList, basestring): try: seList = eval(seList) except: seList = seList.split(',') elif isinstance(seList, (list, dict, tuple)): seList = list(seList) else: return S_ERROR("Bad argument type") res = self.__checkSEs(seList) if not res['OK']: return res self.item_called = seParam return self.__setParam(seList) def __getattr__(self, name): if name.find('get') == 0: item = name[3:] self.item_called = item return self.__getParam if name.find('set') == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError(name) def __getParam(self): if self.item_called == 'Available': return S_OK(self.paramTypes.keys()) if self.item_called == 'Parameters': return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError("Unknown parameter for transformation: %s" % self.item_called) def __setParam(self, value): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError("%s %s %s expected one of %s" % (self.item_called, value, type(value), self.paramTypes[self.item_called])) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res['OK']: if printOutput: self._prettyPrint(res) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res['OK']: if printOutput: self._prettyPrint(res) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate') return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation('extendTransformation', nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation('cleanTransformation', printOutput=printOutput) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation('deleteTransformation', printOutput=printOutput) if res['OK']: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation('addFilesToTransformation', lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation('setFileStatusForTransformation', status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation('getTransformationTaskStats', printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation('getTransformationStats', printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation('deleteTasks', taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se='Unknown', printOutput=False): return self.__executeOperation('addTaskForTransformation', lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation('setTaskStatus', taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop('printOutput') fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles(self, fileStatus=[], lfns=[], outputFields=[ 'FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate' ], orderBy='FileID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'FileID', orderBy) return res def getTransformationTasks(self, taskStatus=[], taskIDs=[], outputFields=[ 'TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime' ], orderBy='TaskID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TaskID', orderBy) return res ############################################################################# def getTransformations(self, transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate' ], orderBy='TransformationID', printOutput=False): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getAuthorDNfromProxy(self): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error("Unable to get uploaded proxy Info %s " % res['Message']) return S_ERROR(res['Message']) res = {'username': username, 'authorDN': author} return S_OK(res) ############################################################################# def getTransformationsByUser(self, authorDN="", userName="", transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN' ], orderBy='TransformationID', printOutput=False): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error(res['Message']) return S_ERROR(res['Message']) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not (userName == "" or userName == foundUserName): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % (userName, ', '.join(transStatus))) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % (authorDN, ', '.join(transStatus))) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getSummaryTransformations(self, transID=[]): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = {'TransformationID': transID} orderby = [] start = 0 maxitems = len(transID) paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\ 'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\ 'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\ 'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems) if not result['OK']: self._prettyPrint(result) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map( lambda pname: paramValues[paramNames.index(pname)], paramShowNames) showDict = dict(zip(paramShowNamesShort, paramShowValues)) dictList.append(showDict) except Exception, x: print 'Exception %s ' % str(x) if not len(dictList) > 0: gLogger.error( 'No found transformations satisfying input condition') return S_ERROR( 'No found transformations satisfying input condition') else: print self._printFormattedDictList(dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0]) return S_OK(dictList)
class TransformationAgent(AgentModule): def initialize(self): """ standard init """ self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin') self.checkCatalog = self.am_getOption('CheckCatalog', 'yes') self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush']) self.maxFiles = self.am_getOption('MaxFiles', 5000) self.am_setOption('shifterProxy', 'ProductionManager') self.transDB = TransformationClient('TransformationDB') self.rm = ReplicaManager() self.unusedFiles = {} return S_OK() def execute(self): """ get and process the transformations to be processed """ res = self.getTransformations() if not res['OK']: gLogger.info("execute: Failed to obtain transformations: %s" % res['Message']) return S_OK() # Process the transformations for transDict in res['Value']: transID = long(transDict['TransformationID']) gLogger.info("execute: Processing transformation %s." % transID) startTime = time.time() res = self.processTransformation(transDict) if not res['OK']: gLogger.info("execute: Failed to process transformation: %s" % res['Message']) else: gLogger.info( "execute: Processed transformation in %.1f seconds" % (time.time() - startTime)) return S_OK() def getTransformations(self): """ Obtain the transformations to be executed """ transName = self.am_getOption('Transformation', 'All') if transName == 'All': gLogger.info( "getTransformations: Initializing general purpose agent.") res = self.transDB.getTransformations( {'Status': self.transformationStatus}, extraParams=True) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message']) return res transformations = res['Value'] gLogger.info( "getTransformations: Obtained %d transformations to process" % len(transformations)) else: gLogger.info( "getTransformations: Initializing for transformation %s." % transName) res = self.transDB.getTransformation(transName, extraParams=True) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message']) return res transformations = [res['Value']] return S_OK(transformations) def processTransformation(self, transDict): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles(condDict={ 'TransformationID': transID, 'Status': 'Unused' }) if not res['OK']: gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message']) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "processTransformation: No 'Unused' files found for transformation." ) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message']) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() #Check if something new happened if len(lfns) == self.unusedFiles.get( transID, 0) and transDict['Status'] != 'Flush': gLogger.info( "processTransformation: No new 'Unused' files found for transformation." ) return S_OK() replicateOrRemove = transDict['Type'].lower() in [ "replication", "removal" ] # Limit the number of LFNs to be considered for replication or removal as they are treated individually if replicateOrRemove: lfns = lfns[0:self.maxFiles - 1] unusedFiles = len(lfns) # Check the data is available with replicas res = self.__getDataReplicas(transID, lfns, active=not replicateOrRemove) if not res['OK']: gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message']) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key('Plugin') and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin) res = self.__generatePluginObject(plugin) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters(transDict) oPlugin.setInputData(dataReplicas) oPlugin.setTransformationFiles(transFiles) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message']) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation(transID, lfns, se) if not res['OK']: gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message']) allCreated = False else: created += 1 unusedFiles -= len(lfns) if created: gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created) self.unusedFiles[transID] = unusedFiles # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message']) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject(self, plugin): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin']) except ImportError, e: gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % (plugin, e)) return S_ERROR() try: plugin_o = getattr(plugModule, 'TransformationPlugin')( '%s' % plugin, transClient=self.transDB, replicaManager=self.rm) return S_OK(plugin_o) except AttributeError, e: gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % (plugin, e)) return S_ERROR()
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare'] ) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError( 'TransformationID %d does not exist' % transID ) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def __setSE( self, seParam, seList ): if isinstance( seList, basestring ): try: seList = eval( seList ) except: seList = seList.split( ',' ) elif isinstance( seList, ( list, dict, tuple ) ): seList = list( seList ) else: return S_ERROR( "Bad argument type" ) res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = seParam return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError( name ) def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called ) def __setParam( self, value ): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) ) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getAuthorDNfromProxy( self ): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error( "Unable to get uploaded proxy Info %s " %res['Message'] ) return S_ERROR( res['Message'] ) res = {'username' : username, 'authorDN' : author } return S_OK( res ) ############################################################################# def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error( res['Message'] ) return S_ERROR( res['Message'] ) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not ( userName == "" or userName == foundUserName ): gLogger.error("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName)) return S_ERROR("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info("Will list transformations created by user '%s' with status '%s'" %(userName, ', '.join( transStatus ))) else: gLogger.info("Will list transformations created by '%s' with status '%s'" %(authorDN, ', '.join( transStatus ))) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getSummaryTransformations( self , transID = []): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = { 'TransformationID' : transID } orderby = [] start = 0 maxitems = len(transID) paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\ 'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\ 'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\ 'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems ) if not result['OK']: self._prettyPrint( result ) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map(lambda pname: paramValues[ paramNames.index(pname) ], paramShowNames) showDict = dict(zip( paramShowNamesShort, paramShowValues )) dictList.append( showDict ) except Exception, x: print 'Exception %s ' %str(x) if not len(dictList) > 0: gLogger.error( 'No found transformations satisfying input condition') return S_ERROR( 'No found transformations satisfying input condition') else: print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] ) return S_OK( dictList )
class TransformationCleaningAgent( AgentModule ): """ .. class:: TransformationCleaningAgent :param DataManger dm: DataManager instance :param TransfromationClient transClient: TransfromationClient instance :param FileCatalogClient metadataClient: FileCatalogClient instance """ def __init__( self, *args, **kwargs ): """ c'tor """ AgentModule.__init__( self, *args, **kwargs ) # # data manager self.dm = None # # transformation client self.transClient = None # # wms client self.wmsClient = None # # request client self.reqClient = None # # file catalog client self.metadataClient = None # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = None # # transformation metadata self.transfidmeta = None # # archive periof in days self.archiveAfter = None # # active SEs self.activeStorages = None # # transformation log SEs self.logSE = None # # enable/disable execution self.enableFlag = None def initialize( self ): """ agent initialisation reading and setting confing opts :param self: self reference """ # # shifter proxy self.am_setOption( 'shifterProxy', 'DataManager' ) # # transformations types self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] ) agentTSTypes = self.am_getOption( 'TransformationTypes', [] ) if agentTSTypes: self.transformationTypes = sorted( agentTSTypes ) else: self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes ) self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) ) # # directory locations self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB', 'MetadataCatalog' ] ) ) self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) # # transformation metadata self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) # # archive periof in days self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter ) # # active SEs self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) ) self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) # # transformation log SEs self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' ) self.log.info( "Will remove logs found on storage element: %s" % self.logSE ) # # enable/disable execution, should be using CS option Status?? with default value as 'Active'?? self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) # # data manager # self.dm = DataManager() # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.reqClient = ReqClient() # # file catalog client self.metadataClient = FileCatalogClient() return S_OK() ############################################################################# def execute( self ): """ execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' ) return S_OK( 'Disabled via CS flag' ) # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations( { 'Status' : 'Cleaning', 'Type' : self.transformationTypes } ) if res['OK']: for transDict in res['Value']: # # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # # We just archive if transDict[ 'Type' ] in self.dataManipTTypes: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: res = self.cleanTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles', 'Type' : self.transformationTypes} ) if res['OK']: for transDict in res['Value']: res = self.removeTransformationOutput( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter ) res = self.transClient.getTransformations( { 'Status' : 'Completed', 'Type' : self.transformationTypes }, older = olderThanTime, timeStamp = 'LastUpdate' ) if res['OK']: for transDict in res['Value']: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: self.log.error( "Could not get the transformations" ) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): """ get the directories for the supplied transformation from the transformation system :param self: self reference :param int transID: transformation ID """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: self.log.error( "Failed to obtain transformation directories", res['Message'] ) return res transDirectories = res['Value'].splitlines() directories = self._addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: self.log.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if not directories: self.log.info( "No output directories found" ) directories = sorted( directories ) return S_OK( directories ) # FIXME If a classmethod, should it not have cls instead of self? @classmethod def _addDirs( self, transID, newDirs, existingDirs ): """ append uniqe :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths """ for folder in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, str( folder ) ): if not folder in existingDirs: existingDirs.append( folder ) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanStorageContents( self, directory ): """ delete lfn dir from all active SE :param self: self reference :param sre directory: folder name """ for storageElement in self.activeStorages: res = self.__removeStorageDirectory( directory, storageElement ) if not res['OK']: return res return S_OK() def __removeStorageDirectory( self, directory, storageElement ): """ wipe out all contents from :directory: at :storageElement: :param self: self reference :param str directory: path :param str storageElement: SE name """ self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( [directory] ) if not res['OK']: self.log.error( "Failed to get PFN for directory", res['Message'] ) return res if directory in res['Value']['Failed']: self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) ) return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectory = res['Value']['Successful'][directory] res = returnSingleResult( se.exists( storageDirectory ) ) if not res['OK']: self.log.error( "Failed to obtain existance of directory", res['Message'] ) return res exists = res['Value'] if not exists: self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) ) return S_OK() res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) ) if not res['OK']: self.log.error( "Failed to remove storage directory", res['Message'] ) return res self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) ) return S_OK() def cleanCatalogContents( self, directory ): """ wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name """ res = self.__getCatalogDirectoryContents( [directory] ) if not res['OK']: return res filesFound = res['Value'] if not filesFound: self.log.info( "No files are registered in the catalog directory %s" % directory ) return S_OK() self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) ) # Executing with shifter proxy gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' ) res = DataManager().removeFile( filesFound, force = True ) gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' ) if not res['OK']: return res realFailure = False for lfn, reason in res['Value']['Failed'].items(): if "File does not exist" in str( reason ): self.log.warn( "File %s not found in some catalog: " % ( lfn ) ) else: self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) ) realFailure = True if realFailure: return S_ERROR( "Failed to remove all files found in the catalog" ) return S_OK() def __getCatalogDirectoryContents( self, directories ): """ get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) ) for directory in directories: self.log.info( directory ) activeDirs = directories allFiles = {} fc = FileCatalog() while len( activeDirs ) > 0: currentDir = activeDirs[0] res = returnSingleResult( fc.listDirectory( currentDir ) ) activeDirs.remove( currentDir ) if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ): self.log.info( "The supplied directory %s does not exist" % currentDir ) elif not res['OK']: if "No such file or directory" in res['Message']: self.log.info( "%s: %s" % ( currentDir, res['Message'] ) ) else: self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) ) else: dirContents = res['Value'] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) self.log.info( "Found %d files" % len( allFiles ) ) return S_OK( allFiles.keys() ) def cleanTransformationLogFiles( self, directory ): """ clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name """ self.log.info( "Removing log files found in the directory %s" % directory ) res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) ) if not res['OK']: self.log.error( "Failed to remove log files", res['Message'] ) return res self.log.info( "Successfully removed transformation log directory" ) return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput( self, transID ): """ This just removes any mention of the output data from the catalog and storage """ self.log.info( "Removing output data for transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] for directory in directories: if not re.search( '/LOG/', directory ): res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res self.log.info( "Removed directories in the catalog and storage for transformation" ) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles( transID ) if not res['OK']: return res self.log.info( "Successfully removed output of transformation %d" % transID ) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) ) return S_OK() def archiveTransformation( self, transID ): """ This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID """ self.log.info( "Archiving transformation %s" % transID ) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully archived transformation %d" % transID ) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Archived" % ( transID ) ) return S_OK() def cleanTransformation( self, transID ): """ This removes what was produced by the supplied transformation, leaving only some info and log in the transformation DB. """ self.log.info( "Cleaning transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search( '/LOG/', directory ): res = self.cleanTransformationLogFiles( directory ) if not res['OK']: return res res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res # Clean ALL the possible remnants found in the BK res = self.cleanMetadataCatalogFiles( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully cleaned transformation %d" % transID ) res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) ) return S_OK() def cleanMetadataCatalogFiles( self, transID ): """ wipe out files from catalog """ res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } ) if not res['OK']: return res fileToRemove = res['Value'] if not fileToRemove: self.log.info( 'No files found for transID %s' % transID ) return S_OK() # Executing with shifter proxy gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' ) res = DataManager().removeFile( fileToRemove, force = True ) gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog" ) self.log.info( "Successfully removed all files found in the BK" ) return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks( self, transID ): """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation """ res = self.__getTransformationExternalIDs( transID ) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type'] ) if not res['OK']: self.log.error( "Failed to determine transformation type" ) return res transType = res['Value'] if transType in self.dataProcTTypes: res = self.__removeWMSTasks( externalIDs ) else: res = self.__removeRequests( externalIDs ) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs( self, transID ): """ collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID """ res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } ) if not res['OK']: self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] ) return res externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ] self.log.info( "Found %d tasks for transformation" % len( externalIDs ) ) return S_OK( externalIDs ) def __removeRequests( self, requestIDs ): """ This will remove requests from the (new) RMS system - #FIXME: if the old system is still installed, it won't remove anything!!! (we don't want to risk removing from the new RMS what is instead in the old) """ # FIXME: checking if the old system is still installed! from DIRAC.ConfigurationSystem.Client import PathFinder if PathFinder.getServiceURL( "RequestManagement/RequestManager" ): self.log.warn( "NOT removing requests!!" ) return S_OK() rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ] for requestName in rIDs: self.reqClient.deleteRequest( requestName ) return S_OK() def __removeWMSTasks( self, transJobIDs ): """ wipe out jobs and their requests from the system TODO: should check request status, maybe FTS files as well ??? :param self: self reference :param list trasnJobIDs: job IDs """ # Prevent 0 job IDs jobIDs = [ int( j ) for j in transJobIDs if int( j ) ] allRemove = True for jobList in breakListIntoChunks( jobIDs, 500 ): res = self.wmsClient.killJob( jobList ) if res['OK']: self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False res = self.wmsClient.deleteJob( jobList ) if res['OK']: self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False if not allRemove: return S_ERROR( "Failed to remove all remnants from WMS" ) self.log.info( "Successfully removed all tasks from the WMS" ) if not jobIDs: self.log.info( "JobIDs not present, unable to remove asociated requests." ) return S_OK() failed = 0 # FIXME: double request client: old/new -> only the new will survive sooner or later # this is the old try: res = RequestClient().getRequestForJobs( jobIDs ) if not res['OK']: self.log.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests = res['Value'] self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) ) if not failoverRequests: return S_OK() for jobID, requestName in failoverRequests.items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = RequestClient().deleteRequest( requestName ) if not res['OK']: self.log.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) except RuntimeError: failoverRequests = {} pass # FIXME: and this is the new res = self.reqClient.getRequestNamesForJobs( jobIDs ) if not res['OK']: self.log.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests.update( res['Value']['Successful'] ) if not failoverRequests: return S_OK() for jobID, requestName in res['Value']['Successful'].items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = self.reqClient.deleteRequest( requestName ) if not res['OK']: self.log.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) if failed: self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) ) self.log.info( "Failed to remove %s requests" % failed ) return S_ERROR( "Failed to remove all the request from RequestDB" ) self.log.info( "Successfully removed all the associated failover requests" ) return S_OK()
class TransformationCleaningAgent(AgentModule): ############################################################################# def initialize(self): """Sets defaults """ self.replicaManager = ReplicaManager() self.transClient = TransformationClient() self.wmsClient = WMSClient() self.requestClient = RequestClient() self.metadataClient = FileCatalogClient() self.storageUsageClient = StorageUsageClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') self.transformationTypes = sortList( self.am_getOption('TransformationTypes', [ 'MCSimulation', 'DataReconstruction', 'DataStripping', 'MCStripping', 'Merge', 'Replication' ])) gLogger.info("Will consider the following transformation types: %s" % str(self.transformationTypes)) self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'StorageUsage', 'MetadataCatalog'])) gLogger.info( "Will search for directories in the following locations: %s" % str(self.directoryLocations)) self.transfidmeta = self.am_getOption('TransfIDMeta', "TransformationID") gLogger.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta) self.archiveAfter = self.am_getOption('ArchiveAfter', 7) # days gLogger.info("Will archive Completed transformations after %d days" % self.archiveAfter) self.activeStorages = sortList(self.am_getOption('ActiveSEs', [])) gLogger.info("Will check the following storage elements: %s" % str(self.activeStorages)) self.logSE = self.am_getOption('TransformationLogSE', 'LogSE') gLogger.info("Will remove logs found on storage element: %s" % self.logSE) return S_OK() ############################################################################# def execute(self): """ The TransformationCleaningAgent execution method. """ self.enableFlag = self.am_getOption('EnableFlag', 'True') if not self.enableFlag == 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag' % (self.section)) return S_OK('Disabled via CS flag') # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations({ 'Status': 'Cleaning', 'Type': self.transformationTypes }) if res['OK']: for transDict in res['Value']: self.cleanTransformation(transDict['TransformationID']) # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files res = self.transClient.getTransformations({ 'Status': 'RemovingFiles', 'Type': self.transformationTypes }) if res['OK']: for transDict in res['Value']: self.removeTransformationOutput(transDict['TransformationID']) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter) res = self.transClient.getTransformations( { 'Status': 'Completed', 'Type': self.transformationTypes }, older=olderThanTime) if res['OK']: for transDict in res['Value']: self.archiveTransformation(transDict['TransformationID']) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories(self, transID): """ Get the directories for the supplied transformation from the transformation system """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories']) if not res['OK']: gLogger.error("Failed to obtain transformation directories", res['Message']) return res transDirectories = res['Value'].splitlines() directories = self.__addDirs(transID, transDirectories, directories) if 'StorageUsage' in self.directoryLocations: res = self.storageUsageClient.getStorageDirectories( '', '', transID, []) if not res['OK']: gLogger.error("Failed to obtain storage usage directories", res['Message']) return res transDirectories = res['Value'] directories = self.__addDirs(transID, transDirectories, directories) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta: transID}) if not res['OK']: gLogger.error("Failed to obtain metadata catalog directories", res['Message']) return res transDirectories = res['Value'] directories = self.__addDirs(transID, transDirectories, directories) if not directories: gLogger.info("No output directories found") directories = sortList(directories) return S_OK(directories) def __addDirs(self, transID, newDirs, existingDirs): for dir in newDirs: transStr = str(transID).zfill(8) if re.search(transStr, dir): if not dir in existingDirs: existingDirs.append(dir) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanStorageContents(self, directory): for storageElement in self.activeStorages: res = self.__removeStorageDirectory(directory, storageElement) if not res['OK']: return res return S_OK() def __removeStorageDirectory(self, directory, storageElement): gLogger.info('Removing the contents of %s at %s' % (directory, storageElement)) res = self.replicaManager.getPfnForLfn([directory], storageElement) if not res['OK']: gLogger.error("Failed to get PFN for directory", res['Message']) return res for directory, error in res['Value']['Failed'].items(): gLogger.error('Failed to obtain directory PFN from LFN', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR('Failed to obtain directory PFN from LFNs') storageDirectory = res['Value']['Successful'].values()[0] res = self.replicaManager.getStorageFileExists(storageDirectory, storageElement, singleFile=True) if not res['OK']: gLogger.error("Failed to obtain existance of directory", res['Message']) return res exists = res['Value'] if not exists: gLogger.info("The directory %s does not exist at %s " % (directory, storageElement)) return S_OK() res = self.replicaManager.removeStorageDirectory(storageDirectory, storageElement, recursive=True, singleDirectory=True) if not res['OK']: gLogger.error("Failed to remove storage directory", res['Message']) return res gLogger.info("Successfully removed %d files from %s at %s" % (res['Value']['FilesRemoved'], directory, storageElement)) return S_OK() def cleanCatalogContents(self, directory): res = self.__getCatalogDirectoryContents([directory]) if not res['OK']: return res filesFound = res['Value'] if not filesFound: return S_OK() gLogger.info( "Attempting to remove %d possible remnants from the catalog and storage" % len(filesFound)) res = self.replicaManager.removeFile(filesFound) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): gLogger.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason)) if res['Value']['Failed']: return S_ERROR("Failed to remove all files found in the catalog") return S_OK() def __getCatalogDirectoryContents(self, directories): gLogger.info('Obtaining the catalog contents for %d directories:' % len(directories)) for directory in directories: gLogger.info(directory) activeDirs = directories allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = self.replicaManager.getCatalogListDirectory(currentDir, singleFile=True) activeDirs.remove(currentDir) if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist'): gLogger.info("The supplied directory %s does not exist" % currentDir) elif not res['OK']: gLogger.error('Failed to get directory contents', '%s %s' % (currentDir, res['Message'])) else: dirContents = res['Value'] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) gLogger.info("Found %d files" % len(allFiles)) return S_OK(allFiles.keys()) def cleanTransformationLogFiles(self, directory): gLogger.info("Removing log files found in the directory %s" % directory) res = self.replicaManager.removeStorageDirectory(directory, self.logSE, singleDirectory=True) if not res['OK']: gLogger.error("Failed to remove log files", res['Message']) return res gLogger.info("Successfully removed transformation log directory") return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput(self, transID): """ This just removes any mention of the output data from the catalog and storage """ gLogger.info("Removing output data for transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res['OK']: gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % (transID, res)) return S_OK() directories = res['Value'] for directory in directories: if not re.search('/LOG/', directory): res = self.cleanCatalogContents(directory) if not res['OK']: return res res = self.cleanStorageContents(directory) if not res['OK']: return res gLogger.info( "Removed directories in the catalog and storage for transformation" ) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles(transID, directories) if not res['OK']: return res gLogger.info("Successfully removed output of transformation %d" % transID) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles') if not res['OK']: gLogger.error( "Failed to update status of transformation %s to RemovedFiles" % (transID), res['Message']) return res gLogger.info("Updated status of transformation %s to RemovedFiles" % (transID)) return S_OK() def archiveTransformation(self, transID): """ This just removes job from the jobDB and the transformation DB """ gLogger.info("Archiving transformation %s" % transID) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res['OK']: return res gLogger.info("Successfully archived transformation %d" % transID) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived') if not res['OK']: gLogger.error( "Failed to update status of transformation %s to Archived" % (transID), res['Message']) return res gLogger.info("Updated status of transformation %s to Archived" % (transID)) return S_OK() def cleanTransformation(self, transID): """ This removes any mention of the supplied transformation """ gLogger.info("Cleaning transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res['OK']: gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % (transID, res)) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search('/LOG/', directory): res = self.cleanTransformationLogFiles(directory) if not res['OK']: return res res = self.cleanCatalogContents(directory) if not res['OK']: return res res = self.cleanStorageContents(directory) if not res['OK']: return res # Clean ALL the possible remnants found in the BK res = self.cleanMetadataCatalogFiles(transID, directories) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res['OK']: return res gLogger.info("Successfully cleaned transformation %d" % transID) # Change the status of the transformation to deleted res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted') if not res['OK']: gLogger.error( "Failed to update status of transformation %s to Deleted" % (transID), res['Message']) return res gLogger.info("Updated status of transformation %s to Deleted" % (transID)) return S_OK() def cleanMetadataCatalogFiles(self, transID, directories): res = self.metadataClient.findFilesByMetadata( {self.transfidmeta: transID}) if not res['OK']: return res fileToRemove = res['Value'] if not len(fileToRemove): gLogger.info('No files found for transID %s' % transID) return S_OK() res = self.replicaManager.removeFile(fileToRemove) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): gLogger.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason)) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog") gLogger.info("Successfully removed all files found in the BK") return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks(self, transID): res = self.__getTransformationExternalIDs(transID) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type']) if not res['OK']: gLogger.error("Failed to determine transformation type") return res transType = res['Value'] if transType == 'Replication': res = self.__removeRequests(externalIDs) else: res = self.__removeWMSTasks(externalIDs) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs(self, transID): res = self.transClient.getTransformationTasks( condDict={'TransformationID': transID}) if not res['OK']: gLogger.error( "Failed to get externalIDs for transformation %d" % transID, res['Message']) return res externalIDs = [] for taskDict in res['Value']: externalIDs.append(taskDict['ExternalID']) gLogger.info("Found %d tasks for transformation" % len(externalIDs)) return S_OK(externalIDs) def __removeRequests(self, requestIDs): gLogger.error("Not removing requests but should do") return S_OK() def __removeWMSTasks(self, jobIDs): allRemove = True for jobList in breakListIntoChunks(jobIDs, 500): res = self.wmsClient.deleteJob(jobList) if res['OK']: gLogger.info("Successfully removed %d jobs from WMS" % len(jobList)) elif (res.has_key('InvalidJobIDs')) and ( not res.has_key('NonauthorizedJobIDs')) and ( not res.has_key('FailedJobIDs')): gLogger.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs'])) elif res.has_key('NonauthorizedJobIDs'): gLogger.error( "Failed to remove %s jobs because not authorized" % len(res['NonauthorizedJobIDs'])) allRemove = False elif res.has_key('FailedJobIDs'): gLogger.error("Failed to remove %s jobs" % len(res['FailedJobIDs'])) allRemove = False if not allRemove: return S_ERROR("Failed to remove all remnants from WMS") gLogger.info("Successfully removed all tasks from the WMS") res = self.requestClient.getRequestForJobs(jobIDs) if not res['OK']: gLogger.error("Failed to get requestID for jobs.", res['Message']) return res failoverRequests = res['Value'] gLogger.info("Found %d jobs with associated failover requests" % len(failoverRequests)) if not failoverRequests: return S_OK() failed = 0 for jobID, requestName in failoverRequests.items(): res = self.requestClient.deleteRequest(requestName) if not res['OK']: gLogger.error("Failed to remove request from RequestDB", res['Message']) failed += 1 else: gLogger.verbose("Removed request %s associated to job %d." % (requestName, jobID)) if failed: gLogger.info("Successfully removed %s requests" % (len(failoverRequests) - failed)) gLogger.info("Failed to remove %s requests" % failed) return S_ERROR("Failed to remove all the request from RequestDB") gLogger.info( "Successfully removed all the associated failover requests") return S_OK()
class TransformationAgent( AgentModule ): def initialize( self ): """ standard init """ self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' ) self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' ) self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] ) self.maxFiles = self.am_getOption( 'MaxFiles', 5000 ) self.am_setOption( 'shifterProxy', 'ProductionManager' ) self.transDB = TransformationClient( 'TransformationDB' ) self.rm = ReplicaManager() self.unusedFiles = {} return S_OK() def execute( self ): """ get and process the transformations to be processed """ res = self.getTransformations() if not res['OK']: gLogger.info( "execute: Failed to obtain transformations: %s" % res['Message'] ) return S_OK() # Process the transformations for transDict in res['Value']: transID = long( transDict['TransformationID'] ) gLogger.info( "execute: Processing transformation %s." % transID ) startTime = time.time() res = self.processTransformation( transDict ) if not res['OK']: gLogger.info( "execute: Failed to process transformation: %s" % res['Message'] ) else: gLogger.info( "execute: Processed transformation in %.1f seconds" % ( time.time() - startTime ) ) return S_OK() def getTransformations( self ): """ Obtain the transformations to be executed """ transName = self.am_getOption( 'Transformation', 'All' ) if transName == 'All': gLogger.info( "getTransformations: Initializing general purpose agent." ) res = self.transDB.getTransformations( {'Status':self.transformationStatus}, extraParams = True ) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message'] ) return res transformations = res['Value'] gLogger.info( "getTransformations: Obtained %d transformations to process" % len( transformations ) ) else: gLogger.info( "getTransformations: Initializing for transformation %s." % transName ) res = self.transDB.getTransformation( transName, extraParams = True ) if not res['OK']: gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message'] ) return res transformations = [res['Value']] return S_OK( transformations ) def processTransformation( self, transDict ): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} ) if not res['OK']: gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message'] ) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "processTransformation: No 'Unused' files found for transformation." ) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] ) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() #Check if something new happened if len( lfns ) == self.unusedFiles.get( transID, 0 ) and transDict['Status'] != 'Flush': gLogger.info( "processTransformation: No new 'Unused' files found for transformation." ) return S_OK() replicateOrRemove = transDict['Type'].lower() in ["replication", "removal"] # Limit the number of LFNs to be considered for replication or removal as they are treated individually if replicateOrRemove: lfns = lfns[0:self.maxFiles - 1] unusedFiles = len( lfns ) # Check the data is available with replicas res = self.__getDataReplicas( transID, lfns, active = not replicateOrRemove ) if not res['OK']: gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message'] ) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key( 'Plugin' ) and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin ) res = self.__generatePluginObject( plugin ) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters( transDict ) oPlugin.setInputData( dataReplicas ) oPlugin.setTransformationFiles( transFiles ) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message'] ) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation( transID, lfns, se ) if not res['OK']: gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message'] ) allCreated = False else: created += 1 unusedFiles -= len( lfns ) if created: gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created ) self.unusedFiles[transID] = unusedFiles # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] ) else: gLogger.info( "processTransformation: Updated transformation status to 'Active'." ) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject( self, plugin ): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] ) except ImportError, e: gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % ( plugin, e ) ) return S_ERROR() try: plugin_o = getattr( plugModule, 'TransformationPlugin' )( '%s' % plugin, transClient = self.transDB, replicaManager = self.rm ) return S_OK( plugin_o ) except AttributeError, e: gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % ( plugin, e ) ) return S_ERROR()
class TransformationCleaningAgent( AgentModule ): """ .. class:: TransformationCleaningAgent :param DataManger dm: DataManager instance :param TransfromationClient transClient: TransfromationClient instance :param FileCatalogClient metadataClient: FileCatalogClient instance """ def __init__( self, *args, **kwargs ): """ c'tor """ AgentModule.__init__( self, *args, **kwargs ) # # data manager self.dm = None # # transformation client self.transClient = None # # wms client self.wmsClient = None # # request client self.reqClient = None # # file catalog client self.metadataClient = None # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = None # # transformation metadata self.transfidmeta = None # # archive periof in days self.archiveAfter = None # # active SEs self.activeStorages = None # # transformation log SEs self.logSE = None # # enable/disable execution self.enableFlag = None def initialize( self ): """ agent initialisation reading and setting confing opts :param self: self reference """ # # shifter proxy self.am_setOption( 'shifterProxy', 'DataManager' ) # # transformations types self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] ) agentTSTypes = self.am_getOption( 'TransformationTypes', [] ) if agentTSTypes: self.transformationTypes = sorted( agentTSTypes ) else: self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes ) self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) ) # # directory locations self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB', 'MetadataCatalog' ] ) ) self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) # # transformation metadata self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) # # archive periof in days self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter ) # # active SEs self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) ) self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) # # transformation log SEs self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' ) self.log.info( "Will remove logs found on storage element: %s" % self.logSE ) # # enable/disable execution, should be using CS option Status?? with default value as 'Active'?? self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) # # data manager # self.dm = DataManager() # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.reqClient = ReqClient() # # file catalog client self.metadataClient = FileCatalogClient() return S_OK() ############################################################################# def execute( self ): """ execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' ) return S_OK( 'Disabled via CS flag' ) # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations( { 'Status' : 'Cleaning', 'Type' : self.transformationTypes } ) if res['OK']: for transDict in res['Value']: # # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # # We just archive if transDict[ 'Type' ] in self.dataManipTTypes: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: res = self.cleanTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles', 'Type' : self.transformationTypes} ) if res['OK']: for transDict in res['Value']: res = self.removeTransformationOutput( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter ) res = self.transClient.getTransformations( { 'Status' : 'Completed', 'Type' : self.transformationTypes }, older = olderThanTime, timeStamp = 'LastUpdate' ) if res['OK']: for transDict in res['Value']: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: self.log.error( "Could not get the transformations" ) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): """ get the directories for the supplied transformation from the transformation system :param self: self reference :param int transID: transformation ID """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: self.log.error( "Failed to obtain transformation directories", res['Message'] ) return res if type( res['Value'] ) != type( [] ): transDirectories = ast.literal_eval( res['Value'] ) else: transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: self.log.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if not directories: self.log.info( "No output directories found" ) directories = sorted( directories ) return S_OK( directories ) # FIXME If a classmethod, should it not have cls instead of self? @classmethod def _addDirs( self, transID, newDirs, existingDirs ): """ append uniqe :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths """ for folder in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, str( folder ) ): if not folder in existingDirs: existingDirs.append( folder ) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanStorageContents( self, directory ): """ delete lfn dir from all active SE :param self: self reference :param sre directory: folder name """ for storageElement in self.activeStorages: res = self.__removeStorageDirectory( directory, storageElement ) if not res['OK']: return res return S_OK() def __removeStorageDirectory( self, directory, storageElement ): """ wipe out all contents from :directory: at :storageElement: :param self: self reference :param str directory: path :param str storageElement: SE name """ self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( [directory] ) if not res['OK']: self.log.error( "Failed to get PFN for directory", res['Message'] ) return res if directory in res['Value']['Failed']: self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) ) return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectory = res['Value']['Successful'][directory] res = returnSingleResult( se.exists( storageDirectory ) ) if not res['OK']: self.log.error( "Failed to obtain existance of directory", res['Message'] ) return res exists = res['Value'] if not exists: self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) ) return S_OK() res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) ) if not res['OK']: self.log.error( "Failed to remove storage directory", res['Message'] ) return res self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) ) return S_OK() def cleanCatalogContents( self, directory ): """ wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name """ res = self.__getCatalogDirectoryContents( [directory] ) if not res['OK']: return res filesFound = res['Value'] if not filesFound: self.log.info( "No files are registered in the catalog directory %s" % directory ) return S_OK() self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) ) # Executing with shifter proxy gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' ) res = DataManager().removeFile( filesFound, force = True ) gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' ) if not res['OK']: return res realFailure = False for lfn, reason in res['Value']['Failed'].items(): if "File does not exist" in str( reason ): self.log.warn( "File %s not found in some catalog: " % ( lfn ) ) else: self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) ) realFailure = True if realFailure: return S_ERROR( "Failed to remove all files found in the catalog" ) return S_OK() def __getCatalogDirectoryContents( self, directories ): """ get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) ) for directory in directories: self.log.info( directory ) activeDirs = directories allFiles = {} fc = FileCatalog() while len( activeDirs ) > 0: currentDir = activeDirs[0] res = returnSingleResult( fc.listDirectory( currentDir ) ) activeDirs.remove( currentDir ) if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ): self.log.info( "The supplied directory %s does not exist" % currentDir ) elif not res['OK']: if "No such file or directory" in res['Message']: self.log.info( "%s: %s" % ( currentDir, res['Message'] ) ) else: self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) ) else: dirContents = res['Value'] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) self.log.info( "Found %d files" % len( allFiles ) ) return S_OK( allFiles.keys() ) def cleanTransformationLogFiles( self, directory ): """ clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name """ self.log.info( "Removing log files found in the directory %s" % directory ) res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) ) if not res['OK']: self.log.error( "Failed to remove log files", res['Message'] ) return res self.log.info( "Successfully removed transformation log directory" ) return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput( self, transID ): """ This just removes any mention of the output data from the catalog and storage """ self.log.info( "Removing output data for transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] for directory in directories: if not re.search( '/LOG/', directory ): res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res self.log.info( "Removed directories in the catalog and storage for transformation" ) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles( transID ) if not res['OK']: return res self.log.info( "Successfully removed output of transformation %d" % transID ) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) ) return S_OK() def archiveTransformation( self, transID ): """ This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID """ self.log.info( "Archiving transformation %s" % transID ) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully archived transformation %d" % transID ) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Archived" % ( transID ) ) return S_OK() def cleanTransformation( self, transID ): """ This removes what was produced by the supplied transformation, leaving only some info and log in the transformation DB. """ self.log.info( "Cleaning transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search( '/LOG/', directory ): res = self.cleanTransformationLogFiles( directory ) if not res['OK']: return res res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res # Clean ALL the possible remnants found in the BK res = self.cleanMetadataCatalogFiles( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully cleaned transformation %d" % transID ) res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) ) return S_OK() def cleanMetadataCatalogFiles( self, transID ): """ wipe out files from catalog """ res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } ) if not res['OK']: return res fileToRemove = res['Value'] if not fileToRemove: self.log.info( 'No files found for transID %s' % transID ) return S_OK() # Executing with shifter proxy gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' ) res = DataManager().removeFile( fileToRemove, force = True ) gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog" ) self.log.info( "Successfully removed all files found in the BK" ) return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks( self, transID ): """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation """ res = self.__getTransformationExternalIDs( transID ) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type'] ) if not res['OK']: self.log.error( "Failed to determine transformation type" ) return res transType = res['Value'] if transType in self.dataProcTTypes: res = self.__removeWMSTasks( externalIDs ) else: res = self.__removeRequests( externalIDs ) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs( self, transID ): """ collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID """ res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } ) if not res['OK']: self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] ) return res externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ] self.log.info( "Found %d tasks for transformation" % len( externalIDs ) ) return S_OK( externalIDs ) def __removeRequests( self, requestIDs ): """ This will remove requests from the (new) RMS system - #FIXME: if the old system is still installed, it won't remove anything!!! (we don't want to risk removing from the new RMS what is instead in the old) """ # FIXME: checking if the old system is still installed! from DIRAC.ConfigurationSystem.Client import PathFinder if PathFinder.getServiceURL( "RequestManagement/RequestManager" ): self.log.warn( "NOT removing requests!!" ) return S_OK() rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ] for requestName in rIDs: self.reqClient.deleteRequest( requestName ) return S_OK() def __removeWMSTasks( self, transJobIDs ): """ wipe out jobs and their requests from the system TODO: should check request status, maybe FTS files as well ??? :param self: self reference :param list trasnJobIDs: job IDs """ # Prevent 0 job IDs jobIDs = [ int( j ) for j in transJobIDs if int( j ) ] allRemove = True for jobList in breakListIntoChunks( jobIDs, 500 ): res = self.wmsClient.killJob( jobList ) if res['OK']: self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False res = self.wmsClient.deleteJob( jobList ) if res['OK']: self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False if not allRemove: return S_ERROR( "Failed to remove all remnants from WMS" ) self.log.info( "Successfully removed all tasks from the WMS" ) if not jobIDs: self.log.info( "JobIDs not present, unable to remove asociated requests." ) return S_OK() failed = 0 # FIXME: double request client: old/new -> only the new will survive sooner or later # this is the old try: res = RequestClient().getRequestForJobs( jobIDs ) if not res['OK']: self.log.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests = res['Value'] self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) ) if not failoverRequests: return S_OK() for jobID, requestName in failoverRequests.items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = RequestClient().deleteRequest( requestName ) if not res['OK']: self.log.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) except RuntimeError: failoverRequests = {} pass # FIXME: and this is the new res = self.reqClient.getRequestNamesForJobs( jobIDs ) if not res['OK']: self.log.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests.update( res['Value']['Successful'] ) if not failoverRequests: return S_OK() for jobID, requestName in res['Value']['Successful'].items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = self.reqClient.deleteRequest( requestName ) if not res['OK']: self.log.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) if failed: self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) ) self.log.info( "Failed to remove %s requests" % failed ) return S_ERROR( "Failed to remove all the request from RequestDB" ) self.log.info( "Successfully removed all the associated failover requests" ) return S_OK()
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """c'tor""" super(Transformation, self).__init__() self.paramTypes = { "TransformationID": six.integer_types, "TransformationName": six.string_types, "Status": six.string_types, "Description": six.string_types, "LongDescription": six.string_types, "Type": six.string_types, "Plugin": six.string_types, "AgentType": six.string_types, "FileMask": six.string_types, "TransformationGroup": six.string_types, "GroupSize": six.integer_types + (float, ), "InheritedFrom": six.integer_types, "Body": six.string_types, "MaxNumberOfTasks": six.integer_types, "EventsPerTask": six.integer_types, } self.paramValues = { "TransformationID": 0, "TransformationName": "", "Status": "New", "Description": "", "LongDescription": "", "Type": "", "Plugin": "Standard", "AgentType": "Manual", "FileMask": "", "TransformationGroup": "General", "GroupSize": 1, "InheritedFrom": 0, "Body": "", "MaxNumberOfTasks": 0, "EventsPerTask": 0, } # the metaquery parameters are neither part of the transformation parameters nor the additional parameters, so # special treatment is necessary self.inputMetaQuery = None self.outputMetaQuery = None self.ops = Operations() self.supportedPlugins = self.ops.getValue( "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"]) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues["TransformationID"] = transID res = self.getTransformation() if res["OK"]: self.exists = True elif res["Message"] == "Transformation does not exist": raise AttributeError("TransformationID %d does not exist" % transID) else: self.paramValues["TransformationID"] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE("TargetSE", seList) def setSourceSE(self, seList): return self.__setSE("SourceSE", seList) def setBody(self, body): """check that the body is a string, or using the proper syntax for multiple operations, or is a BodyPlugin object :param body: transformation body, for example .. code :: python body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }), ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ), ] :type body: string or list of tuples (or lists) of string and dictionaries or a Body plugin (:py:class:`DIRAC.TransformationSystem.Client.BodyPlugin.BaseBody.BaseBody`) :raises TypeError: If the structure is not as expected :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used :returns: S_OK, S_ERROR """ self.item_called = "Body" # Simple single operation body case if isinstance(body, six.string_types): return self.__setParam(body) # BodyPlugin case elif isinstance(body, BaseBody): return self.__setParam(encode(body)) if not isinstance(body, (list, tuple)): raise TypeError("Expected list or string, but %r is %s" % (body, type(body))) # MultiOperation body case for tup in body: if not isinstance(tup, (tuple, list)): raise TypeError("Expected tuple or list, but %r is %s" % (tup, type(tup))) if len(tup) != 2: raise TypeError("Expected 2-tuple, but %r is length %d" % (tup, len(tup))) if not isinstance(tup[0], six.string_types): raise TypeError( "Expected string, but first entry in tuple %r is %s" % (tup, type(tup[0]))) if not isinstance(tup[1], dict): raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % (tup, type(tup[0]))) for par, val in tup[1].items(): if not isinstance(par, six.string_types): raise TypeError( "Expected string, but key in dictionary %r is %s" % (par, type(par))) if par not in Operation.ATTRIBUTE_NAMES: raise ValueError("Unknown attribute for Operation: %s" % par) if not isinstance( val, six.string_types + six.integer_types + (float, list, tuple, dict)): raise TypeError("Cannot encode %r, in json" % (val)) return self.__setParam(json.dumps(body)) def setInputMetaQuery(self, query): """Set the input meta query. :param dict query: dictionary to use for input meta query """ self.inputMetaQuery = query return S_OK() def setOutputMetaQuery(self, query): """Set the output meta query. :param dict query: dictionary to use for output meta query """ self.outputMetaQuery = query return S_OK() def __setSE(self, seParam, seList): if isinstance(seList, six.string_types): try: seList = eval(seList) except Exception: seList = seList.split(",") elif isinstance(seList, (list, dict, tuple)): seList = list(seList) else: return S_ERROR("Bad argument type") res = self.__checkSEs(seList) if not res["OK"]: return res self.item_called = seParam return self.__setParam(seList) def __getattr__(self, name): if name.find("get") == 0: item = name[3:] self.item_called = item return self.__getParam if name.find("set") == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError(name) def __getParam(self): if self.item_called == "Available": return S_OK(list(self.paramTypes)) if self.item_called == "Parameters": return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError("Unknown parameter for transformation: %s" % self.item_called) def __setParam(self, value): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if isinstance(value, self.paramTypes[self.item_called]): change = True else: raise TypeError("%s %s %s expected one of %s" % (self.item_called, value, type(value), self.paramTypes[self.item_called])) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues["TransformationID"] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res["OK"]: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transParams = res["Value"] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res["OK"]: if printOutput: self._prettyPrint(res) return res loggingList = res["Value"] if printOutput: self._printFormattedDictList( loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate") return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation("cleanTransformation", printOutput=printOutput) if res["OK"]: self.paramValues["Status"] = "Cleaned" return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation("deleteTransformation", printOutput=printOutput) if res["OK"]: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation("getTransformationStats", printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False): return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop("printOutput") fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles( self, fileStatus=[], lfns=[], outputFields=[ "FileID", "LFN", "Status", "TaskID", "TargetSE", "UsedSE", "ErrorCount", "InsertedTime", "LastUpdate", ], orderBy="FileID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if fileStatus: condDict["Status"] = fileStatus if lfns: condDict["LFN"] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy) return res def getTransformationTasks( self, taskStatus=[], taskIDs=[], outputFields=[ "TransformationID", "TaskID", "ExternalStatus", "ExternalID", "TargetSE", "CreationTime", "LastUpdateTime", ], orderBy="TaskID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if taskStatus: condDict["ExternalStatus"] = taskStatus if taskIDs: condDict["TaskID"] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy) return res ############################################################################# def getTransformations( self, transID=[], transStatus=[], outputFields=[ "TransformationID", "Status", "AgentType", "TransformationName", "CreationDate" ], orderBy="TransformationID", printOutput=False, ): condDict = {} if transID: condDict["TransformationID"] = transID if transStatus: condDict["Status"] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy) return res ############################################################################# def getAuthorDNfromProxy(self): """gets the AuthorDN and username of the transformation from the uploaded proxy""" username = "" author = "" res = getProxyInfo() if res["OK"]: author = res["Value"]["identity"] username = res["Value"]["username"] else: gLogger.error("Unable to get uploaded proxy Info %s " % res["Message"]) return S_ERROR(res["Message"]) res = {"username": username, "authorDN": author} return S_OK(res) ############################################################################# def getTransformationsByUser( self, authorDN="", userName="", transID=[], transStatus=[], outputFields=[ "TransformationID", "Status", "AgentType", "TransformationName", "CreationDate", "AuthorDN" ], orderBy="TransformationID", printOutput=False, ): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res["OK"]: gLogger.error(res["Message"]) return S_ERROR(res["Message"]) else: foundUserName = res["Value"]["username"] foundAuthor = res["Value"]["authorDN"] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not (userName == "" or userName == foundUserName): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % (userName, ", ".join(transStatus))) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % (authorDN, ", ".join(transStatus))) condDict["AuthorDN"] = authorDN if transID: condDict["TransformationID"] = transID if transStatus: condDict["Status"] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy) return res ############################################################################# def getSummaryTransformations(self, transID=[]): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = {"TransformationID": transID} orderby = [] start = 0 maxitems = len(transID) paramShowNames = [ "TransformationID", "Type", "Status", "Files_Total", "Files_PercentProcessed", "Files_Processed", "Files_Unused", "Jobs_TotalCreated", "Jobs_Waiting", "Jobs_Running", "Jobs_Done", "Jobs_Failed", "Jobs_Stalled", ] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = [ "TransID", "Type", "Status", "F_Total", "F_Proc.(%)", "F_Proc.", "F_Unused", "J_Created", "J_Wait", "J_Run", "J_Done", "J_Fail", "J_Stalled", ] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems) if not result["OK"]: self._prettyPrint(result) return result if result["Value"]["TotalRecords"] > 0: try: paramNames = result["Value"]["ParameterNames"] for paramValues in result["Value"]["Records"]: paramShowValues = map( lambda pname: paramValues[paramNames.index(pname)], paramShowNames) showDict = dict(zip(paramShowNamesShort, paramShowValues)) dictList.append(showDict) except Exception as x: print("Exception %s " % str(x)) if not len(dictList) > 0: gLogger.error( "No found transformations satisfying input condition") return S_ERROR( "No found transformations satisfying input condition") else: print( self._printFormattedDictList(dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0])) return S_OK(dictList) ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): """Add transformation to the transformation system. Sets all parameters currently assigned to the transformation. :param bool addFiles: if True, immediately perform input data query :param bool printOutput: if True, print information about transformation """ res = self._checkCreation() if not res["OK"]: return self._errorReport(res, "Failed transformation sanity check") if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues["TransformationName"], self.paramValues["Description"], self.paramValues["LongDescription"], self.paramValues["Type"], self.paramValues["Plugin"], self.paramValues["AgentType"], self.paramValues["FileMask"], transformationGroup=self.paramValues["TransformationGroup"], groupSize=self.paramValues["GroupSize"], inheritedFrom=self.paramValues["InheritedFrom"], body=self.paramValues["Body"], maxTasks=self.paramValues["MaxNumberOfTasks"], eventsPerTask=self.paramValues["EventsPerTask"], addFiles=addFiles, inputMetaQuery=self.inputMetaQuery, outputMetaQuery=self.outputMetaQuery, ) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transID = res["Value"] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if paramName not in self.paramTypes: res = self.transClient.setTransformationParameter( transID, paramName, paramValue) if not res["OK"]: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"])) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): """Few checks""" if self.paramValues["TransformationID"]: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = [ "TransformationName", "Description", "LongDescription", "Type" ] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = six.moves.input( "Please enter the value of " + parameter + " ") plugin = self.paramValues["Plugin"] if plugin: if plugin not in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard") if not res["OK"]: return res self.paramValues["Plugin"] = res["Value"] plugin = self.paramValues["Plugin"] return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues["GroupSize"] if groupSize <= 0: gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize(1) if not res["OK"]: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"]))) requiredParams = ["SourceSE", "TargetSE"] for requiredParam in requiredParams: if not self.paramValues.get(requiredParam): paramValue = six.moves.input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable( getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(",", " ").split() res = setter(ses) if not res["OK"]: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections("/Resources/StorageElements") if not res["OK"]: return self._errorReport(res, "Failed to get possible StorageElements") missing = set(seList) - set(res["Value"]) if missing: for se in missing: gLogger.error("StorageElement %s is not known" % se) return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default="", insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res["OK"]: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"])) paramValue = res["Value"] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter(paramValue) if not res["OK"]: return res return S_OK(paramValue)
class TransformationCleaningAgent( AgentModule ): ############################################################################# def initialize( self ): """Sets defaults """ self.replicaManager = ReplicaManager() self.transClient = TransformationClient() self.wmsClient = WMSClient() self.requestClient = RequestClient() self.metadataClient = FileCatalogClient() self.storageUsageClient = StorageUsageClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) self.transformationTypes = sortList( self.am_getOption( 'TransformationTypes', ['MCSimulation', 'DataReconstruction', 'DataStripping', 'MCStripping', 'Merge', 'Replication'] ) ) gLogger.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) ) self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'StorageUsage', 'MetadataCatalog'] ) ) gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days gLogger.info( "Will archive Completed transformations after %d days" % self.archiveAfter ) self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) ) gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' ) gLogger.info( "Will remove logs found on storage element: %s" % self.logSE ) return S_OK() ############################################################################# def execute( self ): """ The TransformationCleaningAgent execution method. """ self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag' % ( self.section ) ) return S_OK( 'Disabled via CS flag' ) # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations( {'Status':'Cleaning', 'Type':self.transformationTypes} ) if res['OK']: for transDict in res['Value']: self.cleanTransformation( transDict['TransformationID'] ) # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files res = self.transClient.getTransformations( {'Status':'RemovingFiles', 'Type':self.transformationTypes} ) if res['OK']: for transDict in res['Value']: self.removeTransformationOutput( transDict['TransformationID'] ) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter ) res = self.transClient.getTransformations( {'Status':'Completed', 'Type':self.transformationTypes}, older = olderThanTime ) if res['OK']: for transDict in res['Value']: self.archiveTransformation( transDict['TransformationID'] ) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): """ Get the directories for the supplied transformation from the transformation system """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: gLogger.error( "Failed to obtain transformation directories", res['Message'] ) return res transDirectories = res['Value'].splitlines() directories = self.__addDirs( transID, transDirectories, directories ) if 'StorageUsage' in self.directoryLocations: res = self.storageUsageClient.getStorageDirectories( '', '', transID, [] ) if not res['OK']: gLogger.error( "Failed to obtain storage usage directories", res['Message'] ) return res transDirectories = res['Value'] directories = self.__addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self.__addDirs( transID, transDirectories, directories ) if not directories: gLogger.info( "No output directories found" ) directories = sortList( directories ) return S_OK( directories ) def __addDirs( self, transID, newDirs, existingDirs ): for dir in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, dir ): if not dir in existingDirs: existingDirs.append( dir ) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanStorageContents( self, directory ): for storageElement in self.activeStorages: res = self.__removeStorageDirectory( directory, storageElement ) if not res['OK']: return res return S_OK() def __removeStorageDirectory( self, directory, storageElement ): gLogger.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) ) res = self.replicaManager.getPfnForLfn( [directory], storageElement ) if not res['OK']: gLogger.error( "Failed to get PFN for directory", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectory = res['Value']['Successful'].values()[0] res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True ) if not res['OK']: gLogger.error( "Failed to obtain existance of directory", res['Message'] ) return res exists = res['Value'] if not exists: gLogger.info( "The directory %s does not exist at %s " % ( directory, storageElement ) ) return S_OK() res = self.replicaManager.removeStorageDirectory( storageDirectory, storageElement, recursive = True, singleDirectory = True ) if not res['OK']: gLogger.error( "Failed to remove storage directory", res['Message'] ) return res gLogger.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) ) return S_OK() def cleanCatalogContents( self, directory ): res = self.__getCatalogDirectoryContents( [directory] ) if not res['OK']: return res filesFound = res['Value'] if not filesFound: return S_OK() gLogger.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) ) res = self.replicaManager.removeFile( filesFound ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): gLogger.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the catalog" ) return S_OK() def __getCatalogDirectoryContents( self, directories ): gLogger.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) ) for directory in directories: gLogger.info( directory ) activeDirs = directories allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True ) activeDirs.remove( currentDir ) if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ): gLogger.info( "The supplied directory %s does not exist" % currentDir ) elif not res['OK']: gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) ) else: dirContents = res['Value'] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) gLogger.info( "Found %d files" % len( allFiles ) ) return S_OK( allFiles.keys() ) def cleanTransformationLogFiles( self, directory ): gLogger.info( "Removing log files found in the directory %s" % directory ) res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True ) if not res['OK']: gLogger.error( "Failed to remove log files", res['Message'] ) return res gLogger.info( "Successfully removed transformation log directory" ) return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput( self, transID ): """ This just removes any mention of the output data from the catalog and storage """ gLogger.info( "Removing output data for transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] for directory in directories: if not re.search( '/LOG/', directory ): res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res gLogger.info( "Removed directories in the catalog and storage for transformation" ) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles( transID, directories ) if not res['OK']: return res gLogger.info( "Successfully removed output of transformation %d" % transID ) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] ) return res gLogger.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) ) return S_OK() def archiveTransformation( self, transID ): """ This just removes job from the jobDB and the transformation DB """ gLogger.info( "Archiving transformation %s" % transID ) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res gLogger.info( "Successfully archived transformation %d" % transID ) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] ) return res gLogger.info( "Updated status of transformation %s to Archived" % ( transID ) ) return S_OK() def cleanTransformation( self, transID ): """ This removes any mention of the supplied transformation """ gLogger.info( "Cleaning transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search( '/LOG/', directory ): res = self.cleanTransformationLogFiles( directory ) if not res['OK']: return res res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res # Clean ALL the possible remnants found in the BK res = self.cleanMetadataCatalogFiles( transID, directories ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res gLogger.info( "Successfully cleaned transformation %d" % transID ) # Change the status of the transformation to deleted res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] ) return res gLogger.info( "Updated status of transformation %s to Deleted" % ( transID ) ) return S_OK() def cleanMetadataCatalogFiles( self, transID, directories ): res = self.metadataClient.findFilesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: return res fileToRemove = res['Value'] if not len(fileToRemove): gLogger.info('No files found for transID %s'%transID) return S_OK() res = self.replicaManager.removeFile( fileToRemove ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): gLogger.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog" ) gLogger.info( "Successfully removed all files found in the BK" ) return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks( self, transID ): res = self.__getTransformationExternalIDs( transID ) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type'] ) if not res['OK']: gLogger.error( "Failed to determine transformation type" ) return res transType = res['Value'] if transType == 'Replication': res = self.__removeRequests( externalIDs ) else: res = self.__removeWMSTasks( externalIDs ) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs( self, transID ): res = self.transClient.getTransformationTasks( condDict = {'TransformationID':transID} ) if not res['OK']: gLogger.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] ) return res externalIDs = [] for taskDict in res['Value']: externalIDs.append( taskDict['ExternalID'] ) gLogger.info( "Found %d tasks for transformation" % len( externalIDs ) ) return S_OK( externalIDs ) def __removeRequests( self, requestIDs ): gLogger.error( "Not removing requests but should do" ) return S_OK() def __removeWMSTasks( self, jobIDs ): allRemove = True for jobList in breakListIntoChunks( jobIDs, 500 ): res = self.wmsClient.deleteJob( jobList ) if res['OK']: gLogger.info( "Successfully removed %d jobs from WMS" % len( jobList ) ) elif ( res.has_key( 'InvalidJobIDs' ) ) and ( not res.has_key( 'NonauthorizedJobIDs' ) ) and ( not res.has_key( 'FailedJobIDs' ) ): gLogger.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif res.has_key( 'NonauthorizedJobIDs' ): gLogger.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif res.has_key( 'FailedJobIDs' ): gLogger.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False if not allRemove: return S_ERROR( "Failed to remove all remnants from WMS" ) gLogger.info( "Successfully removed all tasks from the WMS" ) res = self.requestClient.getRequestForJobs( jobIDs ) if not res['OK']: gLogger.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests = res['Value'] gLogger.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) ) if not failoverRequests: return S_OK() failed = 0 for jobID, requestName in failoverRequests.items(): res = self.requestClient.deleteRequest( requestName ) if not res['OK']: gLogger.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: gLogger.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) if failed: gLogger.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) ) gLogger.info( "Failed to remove %s requests" % failed ) return S_ERROR( "Failed to remove all the request from RequestDB" ) gLogger.info( "Successfully removed all the associated failover requests" ) return S_OK()
selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = tc.getTransformationFiles(condDict=selectDict) if not res['OK']: gLogger.error('Failed to get transformation files: %s' % res['Message']) continue if not res['Value']: gLogger.debug('No file found for transformation %s' % t) continue lfns = [f['LFN'] for f in res['Value']] gLogger.notice('Reset files for status: %s' % status) res = tc.setFileStatusForTransformation(t, 'Unused', lfns) if not res['OK']: gLogger.error('Failed to reset file status: %s' % res['Message']) continue if 'Failed' in res['Value']: gLogger.warn('Could not reset some files: ') for lfn, reason in res['Value']['Failed'].items(): gLogger.warn('%s: %s' % (lfn, reason)) gLogger.notice('Updated file statuses to "Unused" for %d file(s)' % len(lfns)) result = tc.setTransformationParameter(t, 'Status', 'Flush') if not result['OK']: gLogger.error('Can not flush transformation: %s' % result['Message']) continue
class TransformationCleaningAgent( AgentModule ): ''' .. class:: TransformationCleaningAgent :param ReplicaManger replicaManager: ReplicaManager instance :param TransfromationClient transClient: TransfromationClient instance :param RequestClient requestClient: RequestClient instance :param FileCatalogClient metadataClient: FileCatalogClient instance ''' def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) # # replica manager self.replicaManager = ReplicaManager() # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.requestClient = RequestClient() # # file catalog clinet self.metadataClient = FileCatalogClient() # # placeholders for CS options # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = None # # transformation metadata self.transfidmeta = None # # archive periof in days self.archiveAfter = None # # active SEs self.activeStorages = None # # transformation log SEs self.logSE = None # # enable/disable execution self.enableFlag = None def initialize( self ): ''' agent initialisation reading and setting confing opts :param self: self reference ''' # # shifter proxy self.am_setOption( 'shifterProxy', 'DataManager' ) # # transformations types agentTSTypes = self.am_getOption( 'TransformationTypes', [] ) if agentTSTypes: self.transformationTypes = sortList( agentTSTypes ) else: dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] ) self.transformationTypes = sortList( dataProc + dataManip ) self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) ) # # directory locations self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB', 'MetadataCatalog' ] ) ) self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) # # transformation metadata self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) # # archive periof in days self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter ) # # active SEs self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) ) self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) # # transformation log SEs self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' ) self.log.info( "Will remove logs found on storage element: %s" % self.logSE ) # # enable/disable execution, should be using CS option Status?? with default value as 'Active'?? self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) return S_OK() ############################################################################# def execute( self ): ''' execution in one agent's cycle :param self: self reference ''' self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' ) return S_OK( 'Disabled via CS flag' ) # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations( { 'Status' : 'Cleaning', 'Type' : self.transformationTypes } ) if res['OK']: for transDict in res['Value']: # # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # # We just archive if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: res = self.cleanTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles', 'Type' : self.transformationTypes} ) if res['OK']: for transDict in res['Value']: res = self.removeTransformationOutput( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter ) res = self.transClient.getTransformations( { 'Status' : 'Completed', 'Type' : self.transformationTypes }, older = olderThanTime, timeStamp = 'LastUpdate' ) if res['OK']: for transDict in res['Value']: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: self.log.error( "Could not get the transformations" ) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): ''' get the directories for the supplied transformation from the transformation system :param self: self reference :param int transID: transformation ID ''' directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: self.log.error( "Failed to obtain transformation directories", res['Message'] ) return res transDirectories = res['Value'].splitlines() directories = self._addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: self.log.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if not directories: self.log.info( "No output directories found" ) directories = sortList( directories ) return S_OK( directories ) @classmethod def _addDirs( self, transID, newDirs, existingDirs ): ''' append uniqe :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths ''' for folder in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, str( folder ) ): if not folder in existingDirs: existingDirs.append( folder ) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanStorageContents( self, directory ): ''' delete lfn dir from all active SE :param self: self reference :param sre directory: folder name ''' for storageElement in self.activeStorages: res = self.__removeStorageDirectory( directory, storageElement ) if not res['OK']: return res return S_OK() def __removeStorageDirectory( self, directory, storageElement ): ''' wipe out all contents from :directory: at :storageElement: :param self: self reference :param str directory: path :param str storageElement: SE name ''' self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) ) res = self.replicaManager.getPfnForLfn( [directory], storageElement ) if not res['OK']: self.log.error( "Failed to get PFN for directory", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectory = res['Value']['Successful'].values()[0] res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True ) if not res['OK']: self.log.error( "Failed to obtain existance of directory", res['Message'] ) return res exists = res['Value'] if not exists: self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) ) return S_OK() res = self.replicaManager.removeStorageDirectory( storageDirectory, storageElement, recursive = True, singleDirectory = True ) if not res['OK']: self.log.error( "Failed to remove storage directory", res['Message'] ) return res self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) ) return S_OK() def cleanCatalogContents( self, directory ): ''' wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name ''' res = self.__getCatalogDirectoryContents( [directory] ) if not res['OK']: return res filesFound = res['Value'] if not filesFound: return S_OK() self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) ) res = self.replicaManager.removeFile( filesFound, force = True ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the catalog" ) return S_OK() def __getCatalogDirectoryContents( self, directories ): ''' get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog ''' self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) ) for directory in directories: self.log.info( directory ) activeDirs = directories allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True ) activeDirs.remove( currentDir ) if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ): self.log.info( "The supplied directory %s does not exist" % currentDir ) elif not res['OK']: self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) ) else: dirContents = res['Value'] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) self.log.info( "Found %d files" % len( allFiles ) ) return S_OK( allFiles.keys() ) def cleanTransformationLogFiles( self, directory ): ''' clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name ''' self.log.info( "Removing log files found in the directory %s" % directory ) res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True ) if not res['OK']: self.log.error( "Failed to remove log files", res['Message'] ) return res self.log.info( "Successfully removed transformation log directory" ) return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput( self, transID ): ''' This just removes any mention of the output data from the catalog and storage ''' self.log.info( "Removing output data for transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] for directory in directories: if not re.search( '/LOG/', directory ): res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res self.log.info( "Removed directories in the catalog and storage for transformation" ) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles( transID ) if not res['OK']: return res self.log.info( "Successfully removed output of transformation %d" % transID ) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) ) return S_OK() def archiveTransformation( self, transID ): ''' This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID ''' self.log.info( "Archiving transformation %s" % transID ) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully archived transformation %d" % transID ) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Archived" % ( transID ) ) return S_OK() def cleanTransformation( self, transID ): ''' This removes any mention of the supplied transformation ''' self.log.info( "Cleaning transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search( '/LOG/', directory ): res = self.cleanTransformationLogFiles( directory ) if not res['OK']: return res res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res # Clean ALL the possible remnants found in the BK res = self.cleanMetadataCatalogFiles( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully cleaned transformation %d" % transID ) # Change the status of the transformation to deleted res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) ) return S_OK() def cleanMetadataCatalogFiles( self, transID ): ''' wipe out files from catalog ''' res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } ) if not res['OK']: return res fileToRemove = res['Value'] if not fileToRemove: self.log.info( 'No files found for transID %s' % transID ) return S_OK() res = self.replicaManager.removeFile( fileToRemove, force = True ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog" ) self.log.info( "Successfully removed all files found in the BK" ) return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks( self, transID ): ''' clean tasks from WMS ''' res = self.__getTransformationExternalIDs( transID ) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type'] ) if not res['OK']: self.log.error( "Failed to determine transformation type" ) return res transType = res['Value'] if transType in [ 'Replication', 'Removal' ]: res = self.__removeRequests( externalIDs ) else: res = self.__removeWMSTasks( externalIDs ) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs( self, transID ): ''' collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID ''' res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } ) if not res['OK']: self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] ) return res externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ] self.log.info( "Found %d tasks for transformation" % len( externalIDs ) ) return S_OK( externalIDs ) def __removeRequests( self, requestIDs ): ''' dummy method ''' self.log.error( "Not removing requests but should do" ) return S_OK() def __removeWMSTasks( self, transJobIDs ): ''' wipe out jobs and their requests from the system TODO: should check request status, maybe FTS files as well ??? :param self: self reference :param list trasnJobIDs: job IDs ''' # Prevent 0 job IDs jobIDs = [ int( j ) for j in transJobIDs if int( j ) ] allRemove = True for jobList in breakListIntoChunks( jobIDs, 500 ): res = self.wmsClient.killJob( jobList ) if res['OK']: self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False res = self.wmsClient.deleteJob( jobList ) if res['OK']: self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False if not allRemove: return S_ERROR( "Failed to remove all remnants from WMS" ) self.log.info( "Successfully removed all tasks from the WMS" ) if not jobIDs: self.log.info( "JobIDs not present, unable to remove asociated requests." ) return S_OK() res = self.requestClient.getRequestForJobs( jobIDs ) if not res['OK']: self.log.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests = res['Value'] self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) ) if not failoverRequests: return S_OK() failed = 0 for jobID, requestName in failoverRequests.items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = self.requestClient.deleteRequest( requestName ) if not res['OK']: self.log.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) if failed: self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) ) self.log.info( "Failed to remove %s requests" % failed ) return S_ERROR( "Failed to remove all the request from RequestDB" ) self.log.info( "Successfully removed all the associated failover requests" ) return S_OK()
class TransformationCLI(cmd.Cmd, API): def __init__(self): self.server = TransformationClient() self.indentSpace = 4 cmd.Cmd.__init__(self) def printPair(self, key, value, separator=":"): valueList = value.split("\n") print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)), separator, valueList[0].strip()) for valueLine in valueList[1:-1]: print "%s %s" % (" " * self.indentSpace, valueLine.strip()) def do_exit(self, args): """ Exits the shell. usage: exit """ sys.exit(0) def do_quit(self, *args): """ Exits the shell. Usage: quit """ sys.exit(0) def do_help(self, args): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commans""" cmd.Cmd.do_help(self, args) # overriting default help command def do_helpall(self, args): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len(args) == 0: print "\nAvailable commands:\n" attrList = dir(self) attrList.sort() for attribute in attrList: if attribute.find("do_") == 0: self.printPair(attribute[3:], getattr(self, attribute).__doc__[1:]) print "" else: command = args.split()[0].strip() try: obj = getattr(self, "do_%s" % command) except: print "There's no such %s command" % command return self.printPair(command, obj.__doc__[1:]) def do_shell(self, args): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall(0, comm) if res['OK'] and res['Value'][0] == 0: returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % (stdOut, stdErr) else: print res['Message'] def check_params(self, args, num): """Checks if the number of parameters correct""" argss = string.split(args) length = len(argss) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num) return (False, length) return (argss, length) def check_id_or_name(self, id_or_name): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long(id_or_name) # its look like id return id_or_name def do_setServer(self, args): """ Set the destination server usage: setServer serverURL """ argss = string.split(args) if len(argss) == 0: print "no server provided" self.serverURL = argss[0] self.server.setServer(self.serverURL) #################################################################### # # These are the methods for transformation manipulation # def do_getall(self, args): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.setServer(self.serverURL) oTrans.getTransformations(transStatus=string.split(args), printOutput=True) def do_getStatus(self, args): """Get transformation details usage: getStatus <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation(transName) if not res['OK']: print "Getting status of %s failed: %s" % (transName, res['Message']) else: print "%s: %s" % (transName, res['Value']['Status']) def do_setStatus(self, args): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = string.split(args) if not len(argss) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status) if not res['OK']: print "Setting status of %s failed: %s" % (transName, res['Message']) else: print "%s set to %s" % (transName, status) def do_start(self, args): """Start transformation usage: start <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active') if not res['OK']: print "Setting Status of %s failed: %s" % (transName, res['Message']) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic') if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message']) else: print "%s started" % transName def do_stop(self, args): """Stop transformation usage: stop <transID|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual') if not res['OK']: print "Stopping of %s failed: %s" % (transName, res['Message']) else: print "%s stopped" % transName def do_flush(self, args): """Flush transformation usage: flush <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush') if not res['OK']: print "Flushing of %s failed: %s" % (transName, res['Message']) else: print "%s flushing" % transName def do_get(self, args): """Get transformation definition usage: get <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: res['Value'].pop('Body') printDict(res['Value']) def do_getBody(self, args): """Get transformation body usage: getBody <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get %s: %s" % (transName, res['Message']) else: print res['Value']['Body'] def do_getFileStat(self, args): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats(transName) if not res['OK']: print "Failed to get statistics for %s: %s" % (transName, res['Message']) else: res['Value'].pop('Total') printDict(res['Value']) def do_modMask(self, args): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message']) else: print "Updated %s filemask" % transName def do_getFiles(self, args): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = string.split(args) if not len(argss) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN') else: print "No files found" def do_getFileStatus(self, args): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = string.split(args) if len(argss) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation(transName) if not res['OK']: print "Failed to get transformation information: %s" % res[ 'Message'] else: selectDict = {'TransformationID': res['Value']['TransformationID']} res = self.server.getTransformationFiles(condDict=selectDict) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append(fileDict) if filesList: self._printFormattedDictList(filesList, [ 'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate' ], 'LFN', 'LFN') else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_setFileStatus(self, args): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = string.split(args) if not len(argss) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn]) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile(self, args): """Reset file status for the given transformation usage: setFileStatus <transName|ID> <lfn> """ argss = string.split(args) if not len(argss) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns) #################################################################### # # These are the methods for file manipulation # def do_addDirectory(self, args): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = string.split(args) if not len(argss) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory(directory, force=True) if not res['OK']: print 'failed to add directory %s: %s' % (directory, res['Message']) else: print 'added %s files for %s' % (res['Value'], directory) def do_replicas(self, args): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = string.split(args) if not len(argss) > 0: print "no files supplied" return res = self.server.getReplicas(argss) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % (lfn, error) for lfn in sortList(res['Value']['Successful'].keys()): ses = sortList(res['Value']['Successful'][lfn].keys()) outStr = "%s :" % lfn.ljust(100) for se in ses: outStr = "%s %s" % (outStr, se.ljust(15)) print outStr def do_addFile(self, args): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = string.split(args) if not len(argss) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': 'IGNORED-SE', 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addFile(lfnDict, force=True) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % (lfn, error) for lfn in sortList(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeFile(self, args): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = string.split(args) if not len(argss) > 0: print "no files supplied" return res = self.server.removeFile(argss) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % (lfn, error) for lfn in sortList(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_addReplica(self, args): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = string.split(args) if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.addReplica(lfnDict, force=True) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % (error) for lfn in sortList(res['Value']['Successful'].keys()): print "added %s" % lfn def do_removeReplica(self, args): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = string.split(args) if not len(argss) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = { 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.removeReplica(lfnDict) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % (error) for lfn in sortList(res['Value']['Successful'].keys()): print "removed %s" % lfn def do_setReplicaStatus(self, args): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = string.split(args) if not len(argss) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = { 'Status': status, 'PFN': 'IGNORED-PFN', 'SE': se, 'Size': 0, 'GUID': 'IGNORED-GUID', 'Checksum': 'IGNORED-CHECKSUM' } res = self.server.setReplicaStatus(lfnDict) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sortList(res['Value']['Failed'].keys()): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % (error) for lfn in sortList(res['Value']['Successful'].keys()): print "updated replica status %s" % lfn
class TransformationCleaningAgent( AgentModule ): ''' .. class:: TransformationCleaningAgent :param ReplicaManger replicaManager: ReplicaManager instance :param TransfromationClient transClient: TransfromationClient instance :param RequestClient requestClient: RequestClient instance :param FileCatalogClient metadataClient: FileCatalogClient instance ''' def __init__( self, *args, **kwargs ): ''' c'tor ''' AgentModule.__init__( self, *args, **kwargs ) # # replica manager self.replicaManager = ReplicaManager() # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.requestClient = RequestClient() # # file catalog clinet self.metadataClient = FileCatalogClient() # # placeholders for CS options # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = None # # transformation metadata self.transfidmeta = None # # archive periof in days self.archiveAfter = None # # active SEs self.activeStorages = None # # transformation log SEs self.logSE = None # # enable/disable execution self.enableFlag = None def initialize( self ): ''' agent initialisation reading and setting confing opts :param self: self reference ''' # # shifter proxy self.am_setOption( 'shifterProxy', 'DataManager' ) # # transformations types agentTSTypes = self.am_getOption( 'TransformationTypes', [] ) if agentTSTypes: self.transformationTypes = sortList( agentTSTypes ) else: dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] ) self.transformationTypes = sortList( dataProc + dataManip ) self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) ) # # directory locations self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB', 'MetadataCatalog' ] ) ) self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) # # transformation metadata self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) # # archive periof in days self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter ) # # active SEs self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) ) self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) # # transformation log SEs self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' ) self.log.info( "Will remove logs found on storage element: %s" % self.logSE ) # # enable/disable execution, should be using CS option Status?? with default value as 'Active'?? self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) return S_OK() ############################################################################# def execute( self ): ''' execution in one agent's cycle :param self: self reference ''' self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' ) return S_OK( 'Disabled via CS flag' ) # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations( { 'Status' : 'Cleaning', 'Type' : self.transformationTypes } ) if res['OK']: for transDict in res['Value']: # # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # # We just archive if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: res = self.cleanTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles', 'Type' : self.transformationTypes} ) if res['OK']: for transDict in res['Value']: res = self.removeTransformationOutput( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) # # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter ) res = self.transClient.getTransformations( { 'Status' : 'Completed', 'Type' : self.transformationTypes }, older = olderThanTime, timeStamp = 'LastUpdate' ) if res['OK']: for transDict in res['Value']: res = self.archiveTransformation( transDict['TransformationID'] ) if not res['OK']: self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'], res['Message'] ) ) else: self.log.error( "Could not get the transformations" ) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): ''' get the directories for the supplied transformation from the transformation system :param self: self reference :param int transID: transformation ID ''' directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: self.log.error( "Failed to obtain transformation directories", res['Message'] ) return res transDirectories = res['Value'].splitlines() directories = self._addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: self.log.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if not directories: self.log.info( "No output directories found" ) directories = sortList( directories ) return S_OK( directories ) @classmethod def _addDirs( self, transID, newDirs, existingDirs ): ''' append uniqe :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths ''' for folder in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, str( folder ) ): if not folder in existingDirs: existingDirs.append( folder ) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanStorageContents( self, directory ): ''' delete lfn dir from all active SE :param self: self reference :param sre directory: folder name ''' for storageElement in self.activeStorages: res = self.__removeStorageDirectory( directory, storageElement ) if not res['OK']: return res return S_OK() def __removeStorageDirectory( self, directory, storageElement ): ''' wipe out all contents from :directory: at :storageElement: :param self: self reference :param str directory: path :param str storageElement: SE name ''' self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) ) res = self.replicaManager.getPfnForLfn( [directory], storageElement ) if not res['OK']: self.log.error( "Failed to get PFN for directory", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectory = res['Value']['Successful'].values()[0] res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True ) if not res['OK']: self.log.error( "Failed to obtain existance of directory", res['Message'] ) return res exists = res['Value'] if not exists: self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) ) return S_OK() res = self.replicaManager.removeStorageDirectory( storageDirectory, storageElement, recursive = True, singleDirectory = True ) if not res['OK']: self.log.error( "Failed to remove storage directory", res['Message'] ) return res self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) ) return S_OK() def cleanCatalogContents( self, directory ): ''' wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name ''' res = self.__getCatalogDirectoryContents( [directory] ) if not res['OK']: return res filesFound = res['Value'] if not filesFound: return S_OK() self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) ) res = self.replicaManager.removeFile( filesFound ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the catalog" ) return S_OK() def __getCatalogDirectoryContents( self, directories ): ''' get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog ''' self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) ) for directory in directories: self.log.info( directory ) activeDirs = directories allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True ) activeDirs.remove( currentDir ) if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ): self.log.info( "The supplied directory %s does not exist" % currentDir ) elif not res['OK']: self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) ) else: dirContents = res['Value'] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) self.log.info( "Found %d files" % len( allFiles ) ) return S_OK( allFiles.keys() ) def cleanTransformationLogFiles( self, directory ): ''' clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name ''' self.log.info( "Removing log files found in the directory %s" % directory ) res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True ) if not res['OK']: self.log.error( "Failed to remove log files", res['Message'] ) return res self.log.info( "Successfully removed transformation log directory" ) return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput( self, transID ): ''' This just removes any mention of the output data from the catalog and storage ''' self.log.info( "Removing output data for transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] for directory in directories: if not re.search( '/LOG/', directory ): res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res self.log.info( "Removed directories in the catalog and storage for transformation" ) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles( transID, directories ) if not res['OK']: return res self.log.info( "Successfully removed output of transformation %d" % transID ) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) ) return S_OK() def archiveTransformation( self, transID ): ''' This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID ''' self.log.info( "Archiving transformation %s" % transID ) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully archived transformation %d" % transID ) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Archived" % ( transID ) ) return S_OK() def cleanTransformation( self, transID ): ''' This removes any mention of the supplied transformation ''' self.log.info( "Cleaning transformation %s" % transID ) res = self.getTransformationDirectories( transID ) if not res['OK']: self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) ) return S_OK() directories = res['Value'] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks( transID ) if not res['OK']: return res # Clean the log files for the jobs for directory in directories: if re.search( '/LOG/', directory ): res = self.cleanTransformationLogFiles( directory ) if not res['OK']: return res res = self.cleanCatalogContents( directory ) if not res['OK']: return res res = self.cleanStorageContents( directory ) if not res['OK']: return res # Clean ALL the possible remnants found in the BK res = self.cleanMetadataCatalogFiles( transID, directories ) if not res['OK']: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation( transID ) if not res['OK']: return res self.log.info( "Successfully cleaned transformation %d" % transID ) # Change the status of the transformation to deleted res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' ) if not res['OK']: self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] ) return res self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) ) return S_OK() def cleanMetadataCatalogFiles( self, transID ): ''' wipe out files from catalog ''' res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } ) if not res['OK']: return res fileToRemove = res['Value'] if not fileToRemove: self.log.info( 'No files found for transID %s' % transID ) return S_OK() res = self.replicaManager.removeFile( fileToRemove ) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) ) if res['Value']['Failed']: return S_ERROR( "Failed to remove all files found in the metadata catalog" ) self.log.info( "Successfully removed all files found in the BK" ) return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks( self, transID ): ''' clean tasks from WMS ''' res = self.__getTransformationExternalIDs( transID ) if not res['OK']: return res externalIDs = res['Value'] if externalIDs: res = self.transClient.getTransformationParameters( transID, ['Type'] ) if not res['OK']: self.log.error( "Failed to determine transformation type" ) return res transType = res['Value'] if transType in [ 'Replication', 'Removal' ]: res = self.__removeRequests( externalIDs ) else: res = self.__removeWMSTasks( externalIDs ) if not res['OK']: return res return S_OK() def __getTransformationExternalIDs( self, transID ): ''' collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID ''' res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } ) if not res['OK']: self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] ) return res externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ] self.log.info( "Found %d tasks for transformation" % len( externalIDs ) ) return S_OK( externalIDs ) def __removeRequests( self, requestIDs ): ''' dummy method ''' self.log.error( "Not removing requests but should do" ) return S_OK() def __removeWMSTasks( self, transJobIDs ): ''' wipe out jobs and their requests from the system TODO: should check request status, maybe FTS files as well ??? :param self: self reference :param list trasnJobIDs: job IDs ''' # Prevent 0 job IDs jobIDs = [ int( j ) for j in transJobIDs if int( j ) ] allRemove = True for jobList in breakListIntoChunks( jobIDs, 500 ): res = self.wmsClient.killJob( jobList ) if res['OK']: self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False res = self.wmsClient.deleteJob( jobList ) if res['OK']: self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) ) elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ): self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) ) elif "NonauthorizedJobIDs" in res: self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) ) allRemove = False elif "FailedJobIDs" in res: self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) ) allRemove = False if not allRemove: return S_ERROR( "Failed to remove all remnants from WMS" ) self.log.info( "Successfully removed all tasks from the WMS" ) if not jobIDs: self.log.info( "JobIDs not present, unable to remove asociated requests." ) return S_OK() res = self.requestClient.getRequestForJobs( jobIDs ) if not res['OK']: self.log.error( "Failed to get requestID for jobs.", res['Message'] ) return res failoverRequests = res['Value'] self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) ) if not failoverRequests: return S_OK() failed = 0 for jobID, requestName in failoverRequests.items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == '0': continue res = self.requestClient.deleteRequest( requestName ) if not res['OK']: self.log.error( "Failed to remove request from RequestDB", res['Message'] ) failed += 1 else: self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) ) if failed: self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) ) self.log.info( "Failed to remove %s requests" % failed ) return S_ERROR( "Failed to remove all the request from RequestDB" ) self.log.info( "Successfully removed all the associated failover requests" ) return S_OK()
class ProductionJob(Job): #pylint: disable=too-many-public-methods, too-many-instance-attributes """ Production job class. Suitable for CLIC studies. Need to sub class and overload for other clients. """ def __init__(self, script = None): super(ProductionJob, self).__init__( script ) self.prodVersion = __RCSID__ self.dryrun = False self.created = False self.checked = False self.call_finalization = False self.finalsdict = {} self.transfid = 0 self.type = 'Production' self.csSection = '/Production/Defaults' self.ops = Operations() self.fc = FileCatalogClient() self.trc = TransformationClient() self.defaultProdID = '12345' self.defaultProdJobID = '12345' self.jobFileGroupSize = 1 self.nbtasks = 1 self.slicesize =0 self.basename = '' self.basepath = self.ops.getValue('/Production/CLIC/BasePath','/ilc/prod/clic/') self.evttype = '' self.datatype = '' self.energycat = '' self.detector = '' self.currtrans = None self.description = '' self.finalpaths = [] self.finalMetaDict = defaultdict( dict ) self.prodMetaDict = {} self.finalMetaDictNonSearch = {} self.metadict_external = {} self.outputStorage = '' self.proxyinfo = getProxyInfo() self.inputdataquery = False self.inputBKSelection = {} self.plugin = 'Standard' self.prodGroup = '' self.prodTypes = ['MCGeneration', 'MCSimulation', 'Test', 'MCReconstruction', 'MCReconstruction_Overlay', 'Merge', 'Split', 'MCGeneration_ILD', 'MCSimulation_ILD', 'MCReconstruction_ILD', 'MCReconstruction_Overlay_ILD', 'Split_ILD' ] self.prodparameters = {} self.prodparameters['NbInputFiles'] = 1 self.prodparameters['nbevts'] = 0 #self.prodparameters["SWPackages"] = '' self._addParameter(self.workflow, "IS_PROD", 'JDL', True, "This job is a production job") if not script: self.__setDefaults() self._recBasePaths = {} self.maxFCFoldersToCheck = 100000 ############################################################################# def __setDefaults(self): """Sets some default parameters. """ self.setPlatform(self.ops.getValue('%s/Platform' % (self.csSection), 'x86_64-slc5-gcc43-opt')) self.setCPUTime('300000') self.setLogLevel('verbose') self.setJobGroup('@{PRODUCTION_ID}') #version control self._setParameter('productionVersion', 'string', self.prodVersion, 'ProdAPIVersion') #General workflow parameters self._setParameter('PRODUCTION_ID', 'string', self.defaultProdID.zfill(8), 'ProductionID') self._setParameter('JOB_ID', 'string', self.defaultProdJobID.zfill(8), 'ProductionJobID') self._setParameter('Priority', 'JDL', '1', 'Priority') self._setParameter('emailAddress', 'string', '*****@*****.**', 'CrashEmailAddress') def _setParameter(self, name, parameterType, parameterValue, description): """Set parameters checking in CS in case some defaults need to be changed. """ if self.ops.getValue('%s/%s' % (self.csSection, name), ''): LOG.debug('Setting %s from CS defaults = %s' % (name, self.ops.getValue('%s/%s' % (self.csSection, name)))) self._addParameter(self.workflow, name, parameterType, self.ops.getValue('%s/%s' % (self.csSection, name), 'default'), description) else: LOG.debug('Setting parameter %s = %s' % (name, parameterValue)) self._addParameter(self.workflow, name, parameterType, parameterValue, description) def setConfig(self,version): """ Define the Configuration package to obtain """ appName = 'ILDConfig' self._addSoftware(appName.lower(), version) self.prodparameters['ILDConfigVersion'] = version self._addParameter( self.workflow, 'ILDConfigPackage', 'JDL', appName+version, 'ILDConfig package' ) return S_OK() def setClicConfig(self, version): """Define the ClicConfig package to obtain.""" return self.setConfigPackage('ClicConfig', version) def setConfigPackage(self, appName, version): """Define the config package to obtain.""" self._addSoftware(appName.lower(), version) self._addParameter(self.workflow, appName + 'Package', 'JDL', appName + version, appName + 'package') self.prodparameters[appName + 'Version'] = version return S_OK() def setDryRun(self, run): """ In case one wants to get all the info as if the prod was being submitted """ self.dryrun = run ############################################################################# def setProdGroup(self, group): """ Sets a user defined tag for the production as appears on the monitoring page """ self.prodGroup = group ############################################################################# def setProdPlugin(self, plugin): """ Sets the plugin to be used to creating the production jobs """ self.plugin = plugin ############################################################################# def setJobFileGroupSize(self, files): """ Sets the number of files to be input to each job created. """ if self.checked: return self._reportError("This input is needed at the beginning of the production definition: it is \ needed for total number of evts.") self.jobFileGroupSize = files self.prodparameters['NbInputFiles'] = files def setNbEvtsPerSlice(self,nbevts): """ Define the number of events in a slice. """ self.slicesize = nbevts ############################################################################# def setProdType(self, prodType): """Set prod type. """ if prodType not in self.prodTypes: raise TypeError('Prod must be one of %s' % (', '.join(self.prodTypes))) self.setType(prodType) ############################################################################# def setWorkflowName(self, name): """Set workflow name. """ self.workflow.setName(name) self.name = name ############################################################################# def setWorkflowDescription(self, desc): """Set workflow name. """ self.workflow.setDescription(desc) ############################################################################# def createWorkflow(self): """ Create XML for local testing. """ name = '%s.xml' % self.name if os.path.exists(name): shutil.move(name,'%s.backup' % name) self.workflow.toXMLFile(name) ############################################################################# def setOutputSE(self, outputse): """ Define where the output file(s) will go. """ self.outputStorage = outputse return S_OK() ############################################################################# def setInputDataQuery(self, metadata): """ Define the input data query needed """ retMetaKey = self._checkMetaKeys( metadata.keys() ) if not retMetaKey['OK']: return retMetaKey if "ProdID" not in metadata: return self._reportError("Input metadata dictionary must contain at least a key 'ProdID' as reference") retDirs = self._checkFindDirectories( metadata ) if not retDirs['OK']: return retDirs dirs = retDirs['Value'].values() for mdir in dirs[:self.maxFCFoldersToCheck]: LOG.notice("Directory: %s" % mdir) res = self.fc.getDirectoryUserMetadata(mdir) if not res['OK']: return self._reportError("Error looking up the catalog for directory metadata") compatmeta = res['Value'] compatmeta.update(metadata) if 'EvtType' in compatmeta: self.evttype = JobHelpers.getValue( compatmeta['EvtType'], str, basestring ) else: return self._reportError("EvtType is not in the metadata, it has to be!") if 'NumberOfEvents' in compatmeta: self.nbevts = JobHelpers.getValue( compatmeta['NumberOfEvents'], int, None ) self.basename = self.evttype LOG.notice("MetaData: %s" % compatmeta) LOG.notice("MetaData: %s" % metadata) if "Energy" in compatmeta: self.energycat = JobHelpers.getValue( compatmeta["Energy"], str, (int, long, basestring) ) if self.energycat.count("tev"): self.energy = Decimal("1000.") * Decimal(self.energycat.split("tev")[0]) elif self.energycat.count("gev"): self.energy = Decimal("1.") * Decimal(self.energycat.split("gev")[0]) else: self.energy = Decimal("1.") * Decimal(self.energycat) gendata = False if 'Datatype' in compatmeta: self.datatype = JobHelpers.getValue( compatmeta['Datatype'], str, basestring ) if self.datatype == 'gen': gendata = True if "DetectorType" in compatmeta and not gendata: self.detector = JobHelpers.getValue( compatmeta["DetectorType"], str, basestring ) self.inputBKSelection = metadata self.inputdataquery = True self.prodparameters['nbevts'] = self.nbevts self.prodparameters["FCInputQuery"] = self.inputBKSelection return S_OK() def setDescription(self, desc): """ Set the production's description :param str desc: Description """ self.description = desc return S_OK() def getBasePath(self): """ Return the base path. Updated by :any:`setInputDataQuery`. """ return self.basepath def addFinalization(self, uploadData = False, registerData = False, uploadLog = False, sendFailover=False): """ Add finalization step :param bool uploadData: Upload or not the data to the storage :param bool uploadLog: Upload log file to storage (currently only available for admins, thus add them to OutputSandbox) :param bool sendFailover: Send Failover requests, and declare files as processed or unused in transfDB :param bool registerData: Register data in the file catalog """ #TODO: Do the registration only once, instead of once for each job self.call_finalization = True self.finalsdict['uploadData'] = uploadData self.finalsdict['registerData'] = registerData self.finalsdict['uploadLog'] = uploadLog self.finalsdict['sendFailover'] = sendFailover def _addRealFinalization(self): """ This is called at creation: now that the workflow is created at the last minute, we need to add this also at the last minute """ importLine = 'from ILCDIRAC.Workflow.Modules.<MODULE> import <MODULE>' dataUpload = ModuleDefinition('UploadOutputData') dataUpload.setDescription('Uploads the output data') self._addParameter(dataUpload, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'UploadOutputData') dataUpload.setBody(body) failoverRequest = ModuleDefinition('FailoverRequest') failoverRequest.setDescription('Sends any failover requests') self._addParameter(failoverRequest, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'FailoverRequest') failoverRequest.setBody(body) registerdata = ModuleDefinition('RegisterOutputData') registerdata.setDescription('Module to add in the metadata catalog the relevant info about the files') self._addParameter(registerdata, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'RegisterOutputData') registerdata.setBody(body) logUpload = ModuleDefinition('UploadLogFile') logUpload.setDescription('Uploads the output log files') self._addParameter(logUpload, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'UploadLogFile') logUpload.setBody(body) errorReport = ModuleDefinition('ReportErrors') errorReport.setDescription('Reports errors at the end') body = importLine.replace('<MODULE>', 'ReportErrors') errorReport.setBody(body) finalization = StepDefinition('Job_Finalization') finalization.addModule(dataUpload) up = finalization.createModuleInstance('UploadOutputData', 'dataUpload') up.setValue("enable", self.finalsdict['uploadData']) finalization.addModule(registerdata) ro = finalization.createModuleInstance('RegisterOutputData', 'RegisterOutputData') ro.setValue("enable", self.finalsdict['registerData']) finalization.addModule(logUpload) ul = finalization.createModuleInstance('UploadLogFile', 'logUpload') ul.setValue("enable", self.finalsdict['uploadLog']) finalization.addModule(failoverRequest) fr = finalization.createModuleInstance('FailoverRequest', 'failoverRequest') fr.setValue("enable", self.finalsdict['sendFailover']) finalization.addModule(errorReport) fr = finalization.createModuleInstance('ReportErrors', 'reportErrors') self.workflow.addStep(finalization) self.workflow.createStepInstance('Job_Finalization', 'finalization') return S_OK() def createProduction(self, name = None): """ Create production. """ if not self.proxyinfo['OK']: return S_ERROR("Not allowed to create production, you need a production proxy.") if 'groupProperties' not in self.proxyinfo['Value']: return S_ERROR("Could not determine groupProperties, you do not have the right proxy.") groupProperties = self.proxyinfo['Value']['groupProperties'] if 'ProductionManagement' not in groupProperties: return S_ERROR("Not allowed to create production, you need a production proxy.") if self.created: return S_ERROR("Production already created.") ###We need to add the applications to the workflow res = self._addToWorkflow() if not res['OK']: return res if self.call_finalization: self._addRealFinalization() workflowName = self.workflow.getName() fileName = '%s.xml' % workflowName LOG.verbose('Workflow XML file name is:', '%s' % fileName) try: self.createWorkflow() except Exception as x: LOG.error("Exception creating workflow", repr(x)) return S_ERROR('Could not create workflow') with open(fileName, 'r') as oFile: workflowXML = oFile.read() if not name: name = workflowName res = self.trc.getTransformationStats(name) if res['OK']: return self._reportError("Transformation with name %s already exists! Cannot proceed." % name) ###Create Tranformation Trans = Transformation() Trans.setTransformationName(name) Trans.setDescription(self.description) Trans.setLongDescription(self.description) Trans.setType(self.type) self.prodparameters['JobType'] = self.type Trans.setPlugin(self.plugin) if self.inputdataquery: Trans.setGroupSize(self.jobFileGroupSize) Trans.setTransformationGroup(self.prodGroup) Trans.setBody(workflowXML) if not self.slicesize: Trans.setEventsPerTask(self.jobFileGroupSize * self.nbevts) else: Trans.setEventsPerTask(self.slicesize) self.currtrans = Trans if self.dryrun: LOG.notice('Would create prod called', name) self.transfid = 12345 else: res = Trans.addTransformation() if not res['OK']: LOG.error(res['Message']) return res self.transfid = Trans.getTransformationID()['Value'] if self.inputBKSelection: res = self.applyInputDataQuery() if not self.dryrun: Trans.setAgentType("Automatic") Trans.setStatus("Active") finals = [] for finalpaths in self.finalpaths: finalpaths = finalpaths.rstrip("/") finalpaths += "/"+str(self.transfid).zfill(8) finals.append(finalpaths) self.finalMetaDict[finalpaths].update( { "ProdID": self.transfid } ) self.finalMetaDict[finalpaths].update( self.prodMetaDict ) # if 'ILDConfigVersion' in self.prodparameters: # self.finalMetaDict[finalpaths].update({"ILDConfig":self.prodparameters['ILDConfigVersion']}) if self.nbevts: self.finalMetaDict[finalpaths].update({'NumberOfEvents' : self.jobFileGroupSize * self.nbevts}) self.finalpaths = finals self.created = True return S_OK() def setNbOfTasks(self, nbtasks): """ Define the number of tasks you want. Useful for generation jobs. """ if not self.currtrans: LOG.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if self.inputBKSelection and self.plugin not in ['Limited', 'SlicedLimited']: LOG.error('Metadata selection activated, should not specify the number of jobs') return S_ERROR() self.nbtasks = nbtasks self.currtrans.setMaxNumberOfTasks(self.nbtasks) #pylint: disable=E1101 return S_OK() def applyInputDataQuery(self, metadata = None, prodid = None): """ Tell the production to update itself using the metadata query specified, i.e. submit new jobs if new files are added corresponding to same query. """ if not self.transfid and self.currtrans: self.transfid = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101 elif prodid: self.transfid = prodid if not self.transfid: LOG.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if metadata: self.inputBKSelection = metadata if not self.dryrun: res = self.trc.createTransformationInputDataQuery(self.transfid, self.inputBKSelection) if not res['OK']: return res else: LOG.notice("Would use %s as metadata query for production" % str(self.inputBKSelection)) return S_OK() def addMetadataToFinalFiles(self, metadict): """ Add additionnal non-query metadata """ self.metadict_external = metadict return S_OK() def finalizeProd(self, prodid = None, prodinfo = None): """ Finalize definition: submit to Transformation service and register metadata """ currtrans = 0 if self.currtrans: if not self.dryrun: currtrans = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101 else: currtrans = 12345 if prodid: currtrans = prodid if not currtrans: LOG.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if prodinfo: self.prodparameters = prodinfo info = [] info.append('%s Production %s has following parameters:\n' % (self.prodparameters['JobType'], currtrans)) if "Process" in self.prodparameters: info.append('- Process %s' % self.prodparameters['Process']) if "Energy" in self.prodparameters: info.append('- Energy %s GeV' % self.prodparameters["Energy"]) if not self.slicesize: self.prodparameters['nbevts'] = self.jobFileGroupSize * self.nbevts else: self.prodparameters['nbevts'] = self.slicesize if self.prodparameters['nbevts']: info.append("- %s events per job" % (self.prodparameters['nbevts'])) if self.prodparameters.get('lumi', False): info.append(' corresponding to a luminosity %s fb' % (self.prodparameters['lumi'] * \ self.prodparameters['NbInputFiles'])) if 'FCInputQuery' in self.prodparameters: info.append('Using InputDataQuery :') for key, val in self.prodparameters['FCInputQuery'].iteritems(): info.append(' %s = %s' % (key, val)) if "SWPackages" in self.prodparameters: info.append('- SW packages %s' % self.prodparameters["SWPackages"]) if "SoftwareTag" in self.prodparameters: info.append('- SW tags %s' % self.prodparameters["SoftwareTag"]) if "ILDConfigVersion" in self.prodparameters: info.append('- ILDConfig %s' % self.prodparameters['ILDConfigVersion']) if 'ClicConfigVersion' in self.prodparameters: info.append('- ClicConfig %s' % self.prodparameters['ClicConfigVersion'] ) if 'extraCLIArguments' in self.prodparameters: info.append('- ExtraCLIArguments %s' % self.prodparameters['extraCLIArguments'] ) # as this is the very last call all applications are registered, so all software packages are known #add them the the metadata registration for finalpath in self.finalpaths: if finalpath not in self.finalMetaDictNonSearch: self.finalMetaDictNonSearch[finalpath] = {} if "SWPackages" in self.prodparameters: self.finalMetaDictNonSearch[finalpath]["SWPackages"] = self.prodparameters["SWPackages"] if self.metadict_external: self.finalMetaDictNonSearch[finalpath].update(self.metadict_external) info.append('- Registered metadata: ') for path, metadata in sorted( self.finalMetaDict.iteritems() ): info.append(' %s = %s' % (path, metadata)) info.append('- Registered non searchable metadata: ') for path, metadata in sorted( self.finalMetaDictNonSearch.iteritems() ): info.append(' %s = %s' % (path, metadata)) infoString = '\n'.join(info) self.prodparameters['DetailedInfo'] = infoString for name, val in self.prodparameters.iteritems(): result = self._setProdParameter(currtrans, name, val) if not result['OK']: LOG.error(result['Message']) res = self._registerMetadata() if not res['OK']: LOG.error('Could not register the following directories:', res['Message']) return res return S_OK() def _createDirectory(self, path, failed, mode=0o775): """Create the directory at path if it does not exist. :param str path: path to check :param list failed: list of failed paths :param int mode: mode to set for directory """ exists = returnSingleResult(self.fc.isDirectory(path)) if exists['OK'] and exists['Value']: LOG.verbose('Directory already exists:', path) return S_OK() result = returnSingleResult(self.fc.createDirectory(path)) if not result['OK']: LOG.error('Failed to create directory:', '%s: %s' % (path, result['Message'])) failed[path].append(result['Message']) return S_ERROR() LOG.verbose('Successfully created directory:', path) res = self.fc.changePathMode({path: mode}, False) if not res['OK']: LOG.error(res['Message']) failed[path].append(res['Message']) return S_ERROR() LOG.verbose('Successfully changed mode:', path) return S_OK() def _checkMetadata(self, path, metaCopy): """Get existing metadata, if it is the same do not set it again, otherwise return error.""" existingMetadata = self.fc.getDirectoryUserMetadata(path.rstrip('/')) if not existingMetadata['OK']: return S_OK() failure = False for key, value in existingMetadata['Value'].iteritems(): if key in metaCopy and metaCopy[key] != value: LOG.error('Metadata values for folder %s disagree for key %s: Existing(%r), new(%r)' % (path, key, value, metaCopy[key])) failure = True elif key in metaCopy and metaCopy[key] == value: LOG.verbose('Meta entry is unchanged', '%s = %s' % (key, value)) metaCopy.pop(key, None) if failure: return S_ERROR('Error when setting new metadata, already existing metadata disagrees!') return S_OK() def _registerMetadata(self): """Set metadata for given folders. Register path and metadata before the production actually runs. This allows for the definition of the full chain in 1 go. """ prevent_registration = self.ops.getValue('Production/PreventMetadataRegistration', False) if self.dryrun or prevent_registration: LOG.notice('Would have created and registered the following\n', '\n '.join([' * %s: %s' % (fPath, val) for fPath, val in self.finalMetaDict.iteritems()])) LOG.notice('Would have set this as non searchable metadata', str(self.finalMetaDictNonSearch)) return S_OK() failed = defaultdict(list) for path, meta in sorted(self.finalMetaDict.items()): res = self._createDirectory(path, failed) if not res['OK']: continue LOG.verbose('Checking to set metadata:', meta) metaCopy = dict(meta) res = self._checkMetadata(path, metaCopy) if not res['OK']: return res if not metaCopy: LOG.verbose('No new metadata to set') continue LOG.verbose('Setting metadata information: ', '%s: %s' % (path, metaCopy)) result = self.fc.setMetadata(path.rstrip('/'), metaCopy) if not result['OK']: LOG.error('Could not preset metadata', str(metaCopy)) LOG.error('Could not preset metadata', result['Message']) failed[path].append(result['Message']) for path, meta in sorted(self.finalMetaDictNonSearch.items()): res = self._createDirectory(path, failed) if not res['OK']: continue LOG.verbose('Setting non searchable metadata information: ', '%s: %s' % (path, meta)) result = self.fc.setMetadata(path.rstrip('/'), meta) if not result['OK']: LOG.error('Could not preset non searchable metadata', str(meta)) LOG.error('Could not preset non searchable metadata', result['Message']) failed[path].append(result['Message']) if failed: return S_ERROR('Failed to register some metadata: %s' % dict(failed)) return S_OK() def getMetadata(self): """ Return the corresponding metadata of the last step """ metadict = {} for meta in self.finalMetaDict.values(): metadict.update(meta) if 'NumberOfEvents' in metadict: del metadict['NumberOfEvents'] #As this is not supposed to be a searchable thing return metadict def _setProdParameter(self, prodID, pname, pvalue): """ Set a production parameter. """ if isinstance( pvalue, list ): pvalue = '\n'.join(pvalue) if isinstance( pvalue, (int, long) ): pvalue = str(pvalue) if not self.dryrun: result = self.trc.setTransformationParameter(int(prodID), str(pname), str(pvalue)) if not result['OK']: LOG.error('Problem setting parameter %s for production %s and value:\n%s' % (prodID, pname, pvalue)) else: LOG.notice("Adding %s=%s to transformation" % (str(pname), str(pvalue))) result = S_OK() return result def _jobSpecificParams(self, application): """ For production additional checks are needed: ask the user """ if self.created: return S_ERROR("The production was created, you cannot add new applications to the job.") if not application.logFile: logf = application.appname + "_" + application.version + "_@{STEP_ID}.log" res = application.setLogFile(logf) if not res['OK']: return res #in fact a bit more tricky as the log files have the prodID and jobID in them ### Retrieve from the application the essential info to build the prod info. if not self.nbevts and not self.slicesize: self.nbevts = application.numberOfEvents if not self.nbevts: return S_ERROR("Number of events to process is not defined.") elif not application.numberOfEvents: if not self.slicesize: res = application.setNumberOfEvents(self.jobFileGroupSize * self.nbevts) else: res = application.setNumberOfEvents(self.slicesize) if not res['OK']: return res if application.numberOfEvents > 0 and (self.jobFileGroupSize * self.nbevts > application.numberOfEvents or self.slicesize > application.numberOfEvents): self.nbevts = application.numberOfEvents if not self.energy: if application.energy: self.energy = Decimal((("%1.2f" % float(application.energy)).rstrip('0').rstrip('.'))) else: return S_ERROR("Could not find the energy defined, it is needed for the production definition.") elif not application.energy: res = application.setEnergy(float(self.energy)) if not res['OK']: return res if self.energy: self._setParameter( "Energy", "float", float(self.energy), "Energy used") self.prodparameters["Energy"] = float(self.energy) if not self.evttype: if hasattr(application, 'eventType'): self.evttype = application.eventType else: return S_ERROR("Event type not found nor specified, it's mandatory for the production paths.") self.prodparameters['Process'] = self.evttype if not self.outputStorage: return S_ERROR("You need to specify the Output storage element") curpackage = "%s.%s" % (application.appname, application.version) if "SWPackages" in self.prodparameters: if not self.prodparameters["SWPackages"].count(curpackage): self.prodparameters["SWPackages"] += ";%s" % ( curpackage ) else : self.prodparameters["SWPackages"] = "%s" % (curpackage) if not application.accountInProduction: res = self._updateProdParameters(application) if not res['OK']: return res self.checked = True return S_OK() res = application.setOutputSE(self.outputStorage) if not res['OK']: return res energypath = self.getEnergyPath() if not self.basename: self.basename = self.evttype evttypepath = '' if not self.evttype[-1] == '/': evttypepath = self.evttype + '/' path = self.basepath ###Need to resolve file names and paths if self.energy: self.finalMetaDict[self.basepath + energypath] = {"Energy":str(self.energy)} if hasattr(application, "setOutputRecFile") and not application.willBeCut: evtPath = self.basepath + energypath + evttypepath self.finalMetaDict[evtPath] = {'EvtType': self.evttype} detPath = evtPath + application.detectortype self.finalMetaDict[detPath] = {'DetectorType': application.detectortype} if application.keepRecFile: path = self.basepath + energypath + evttypepath + application.detectortype + '/REC' self.finalMetaDict[path] = {'Datatype': 'REC'} fname = self.basename + '_rec.slcio' application.setOutputRecFile(fname, path) LOG.info('Will store the files under', path) self.finalpaths.append(path) path = self.basepath + energypath + evttypepath + application.detectortype + '/DST' self.finalMetaDict[path] = {'Datatype': 'DST'} fname = self.basename + '_dst.slcio' application.setOutputDstFile(fname, path) LOG.info('Will store the files under', path) self.finalpaths.append(path) elif hasattr(application, "outputFile") and hasattr(application, 'datatype') and not application.outputFile and not application.willBeCut: path = self.basepath + energypath + evttypepath self.finalMetaDict[path] = {"EvtType" : self.evttype} if hasattr(application, "detectortype"): if application.detectortype: path += application.detectortype self.finalMetaDict[path] = {"DetectorType" : application.detectortype} path += '/' elif self.detector: path += self.detector self.finalMetaDict[path] = {"DetectorType" : self.detector} path += '/' if not application.datatype and self.datatype: application.datatype = self.datatype path += application.datatype self.finalMetaDict[path] = {'Datatype' : application.datatype} LOG.info("Will store the files under", "%s" % path) self.finalpaths.append(path) extension = 'stdhep' if application.datatype in ['SIM', 'REC']: extension = 'slcio' fname = self.basename + "_%s" % (application.datatype.lower()) + "." + extension application.setOutputFile(fname, path) self.basepath = path res = self._updateProdParameters(application) if not res['OK']: return res self.checked = True return S_OK() def _updateProdParameters(self, application): """ Update the prod parameters stored in the production parameters visible from the web """ try: self.prodparameters.update(application.prodparameters) except Exception as x: return S_ERROR("Exception: %r" % x ) if hasattr( application, 'extraCLIArguments' ) and application.extraCLIArguments: self.prodparameters['extraCLIArguments'] = repr(application.extraCLIArguments) return S_OK() def _jobSpecificModules(self, application, step): return application._prodjobmodules(step) def getEnergyPath(self): """returns the energy path 250gev or 3tev or 1.4tev etc.""" energy = Decimal(str(self.energy)) tD = Decimal('1000.0') unit = 'gev' if energy < tD else 'tev' energy = energy if energy < tD else energy/tD energyPath = ("%1.2f" % energy).rstrip('0').rstrip('.') energyPath = energyPath+unit+'/' LOG.info("Energy path is: ", energyPath) return energyPath def _checkMetaKeys( self, metakeys, extendFileMeta=False ): """ check if metadata keys are allowed to be metadata :param list metakeys: metadata keys for production metadata :param bool extendFileMeta: also use FileMetaFields for checking meta keys :returns: S_OK, S_ERROR """ res = self.fc.getMetadataFields() if not res['OK']: LOG.error("Could not contact File Catalog") return S_ERROR("Could not contact File Catalog") metaFCkeys = res['Value']['DirectoryMetaFields'].keys() if extendFileMeta: metaFCkeys.extend( res['Value']['FileMetaFields'].keys() ) for key in metakeys: for meta in metaFCkeys: if meta != key and meta.lower() == key.lower(): return self._reportError("Key syntax error %r, should be %r" % (key, meta), name = self.__class__.__name__) if key not in metaFCkeys: return self._reportError("Key %r not found in metadata keys, allowed are %r" % (key, metaFCkeys)) return S_OK() def _checkFindDirectories( self, metadata ): """ find directories by metadata and check that there are directories found :param dict metadata: metadata dictionary :returns: S_OK, S_ERROR """ res = self.fc.findDirectoriesByMetadata(metadata) if not res['OK']: return self._reportError("Error looking up the catalog for available directories") elif len(res['Value']) < 1: return self._reportError('Could not find any directories corresponding to the query issued') return res def setReconstructionBasePaths( self, recPath, dstPath ): """ set the output Base paths for the reconstruction REC and DST files """ self._recBasePaths['REC'] = recPath self._recBasePaths['DST'] = dstPath
class TransformationAgent(AgentModule): def initialize(self): self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin') self.checkCatalog = self.am_getOption('CheckCatalog', 'yes') # This sets the Default Proxy to used as that defined under # /Operations/Shifter/ProductionManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'ProductionManager') self.transDB = TransformationClient('TransformationDB') self.rm = ReplicaManager() return S_OK() def execute(self): # Get the transformations to process res = self.getTransformations() if not res['OK']: gLogger.info("%s.execute: Failed to obtain transformations: %s" % (AGENT_NAME, res['Message'])) return S_OK() # Process the transformations for transDict in res['Value']: transID = long(transDict['TransformationID']) gLogger.info("%s.execute: Processing transformation %s." % (AGENT_NAME, transID)) startTime = time.time() res = self.processTransformation(transDict) if not res['OK']: gLogger.info( "%s.execute: Failed to process transformation: %s" % (AGENT_NAME, res['Message'])) else: gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % (AGENT_NAME, time.time() - startTime)) return S_OK() def getTransformations(self): # Obtain the transformations to be executed transName = self.am_getOption('Transformation', 'All') if transName == 'All': gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME) res = self.transDB.getTransformations( {'Status': ['Active', 'Completing', 'Flush']}, extraParams=True) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message']) return res transformations = res['Value'] gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % (AGENT_NAME, len(transformations))) else: gLogger.info( "%s.getTransformations: Initializing for transformation %s." % (AGENT_NAME, transName)) res = self.transDB.getTransformation(transName, extraParams=True) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message']) return res transformations = [res['Value']] return S_OK(transformations) def processTransformation(self, transDict): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles(condDict={ 'TransformationID': transID, 'Status': 'Unused' }) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message']) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message']) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME) return S_OK() # Check the data is available with replicas res = self.__getDataReplicas(transID, lfns, active=(transDict['Type'].lower() not in ["replication", "removal"])) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message']) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key('Plugin') and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % (AGENT_NAME, plugin)) res = self.__generatePluginObject(plugin) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters(transDict) oPlugin.setInputData(dataReplicas) oPlugin.setTransformationFiles(transFiles) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message']) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation(transID, lfns, se) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message']) allCreated = False else: created += 1 if created: gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % (AGENT_NAME, created)) # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active') if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message']) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject(self, plugin): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__(self.pluginLocation, globals(), locals(), ['TransformationPlugin']) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x) return S_ERROR() try: evalString = "plugModule.TransformationPlugin('%s')" % plugin return S_OK(eval(evalString)) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % (AGENT_NAME, plugin), '', x) return S_ERROR()
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { "TransformationID": [types.IntType, types.LongType], "TransformationName": types.StringTypes, "Status": types.StringTypes, "Description": types.StringTypes, "LongDescription": types.StringTypes, "Type": types.StringTypes, "Plugin": types.StringTypes, "AgentType": types.StringTypes, "FileMask": types.StringTypes, "TransformationGroup": types.StringTypes, "GroupSize": [types.IntType, types.LongType, types.FloatType], "InheritedFrom": [types.IntType, types.LongType], "Body": types.StringTypes, "MaxNumberOfTasks": [types.IntType, types.LongType], "EventsPerTask": [types.IntType, types.LongType], } self.paramValues = { "TransformationID": 0, "TransformationName": "", "Status": "New", "Description": "", "LongDescription": "", "Type": "", "Plugin": "Standard", "AgentType": "Manual", "FileMask": "", "TransformationGroup": "General", "GroupSize": 1, "InheritedFrom": 0, "Body": "", "MaxNumberOfTasks": 0, "EventsPerTask": 0, } self.ops = Operations() self.supportedPlugins = self.ops.getValue( "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"] ) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues["TransformationID"] = transID res = self.getTransformation() if res["OK"]: self.exists = True elif res["Message"] == "Transformation does not exist": raise AttributeError, "TransformationID %d does not exist" % transID else: self.paramValues["TransformationID"] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL) ) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE("TargetSE", seList) def setSourceSE(self, seList): return self.__setSE("SourceSE", seList) def __setSE(self, se, seList): if type(seList) in types.StringTypes: try: seList = eval(seList) except: seList = seList.replace(",", " ").split() res = self.__checkSEs(seList) if not res["OK"]: return res self.item_called = se return self.__setParam(seList) def __getattr__(self, name): if name.find("get") == 0: item = name[3:] self.item_called = item return self.__getParam if name.find("set") == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam(self): if self.item_called == "Available": return S_OK(self.paramTypes.keys()) if self.item_called == "Parameters": return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam(self, value): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type(value), self.paramTypes[self.item_called], ) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key(self.item_called): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues["TransformationID"] if self.exists and transID: res = self.transClient.setTransformationParameter(transID, self.item_called, value) if not res["OK"]: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transParams = res["Value"] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr(self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error("Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res["OK"]: if printOutput: self._prettyPrint(res) return res loggingList = res["Value"] if printOutput: self._printFormattedDictList( loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate" ) return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation("cleanTransformation", printOutput=printOutput) if res["OK"]: self.paramValues["Status"] = "Cleaned" return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation("deleteTransformation", printOutput=printOutput) if res["OK"]: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation("getTransformationStats", printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False): return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues["TransformationID"] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop("printOutput") fcn = None if hasattr(self.transClient, operation) and callable(getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR("Unable to invoke %s, it isn't a member funtion of TransformationClient") res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles( self, fileStatus=[], lfns=[], outputFields=[ "FileID", "LFN", "Status", "TaskID", "TargetSE", "UsedSE", "ErrorCount", "InsertedTime", "LastUpdate", ], orderBy="FileID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if fileStatus: condDict["Status"] = fileStatus if lfns: condDict["LFN"] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy) return res def getTransformationTasks( self, taskStatus=[], taskIDs=[], outputFields=[ "TransformationID", "TaskID", "ExternalStatus", "ExternalID", "TargetSE", "CreationTime", "LastUpdateTime", ], orderBy="TaskID", printOutput=False, ): condDict = {"TransformationID": self.paramValues["TransformationID"]} if taskStatus: condDict["ExternalStatus"] = taskStatus if taskIDs: condDict["TaskID"] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy) return res ############################################################################# def getTransformations( self, transID=[], transStatus=[], outputFields=["TransformationID", "Status", "AgentType", "TransformationName", "CreationDate"], orderBy="TransformationID", printOutput=False, ): condDict = {} if transID: condDict["TransformationID"] = transID if transStatus: condDict["Status"] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res["OK"]: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" ")) elif not res["Value"]: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy) return res ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): res = self._checkCreation() if not res["OK"]: return self._errorReport(res, "Failed transformation sanity check") if printOutput: gLogger.info("Will attempt to create transformation with the following parameters") self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues["TransformationName"], self.paramValues["Description"], self.paramValues["LongDescription"], self.paramValues["Type"], self.paramValues["Plugin"], self.paramValues["AgentType"], self.paramValues["FileMask"], transformationGroup=self.paramValues["TransformationGroup"], groupSize=self.paramValues["GroupSize"], inheritedFrom=self.paramValues["InheritedFrom"], body=self.paramValues["Body"], maxTasks=self.paramValues["MaxNumberOfTasks"], eventsPerTask=self.paramValues["EventsPerTask"], addFiles=addFiles, ) if not res["OK"]: if printOutput: self._prettyPrint(res) return res transID = res["Value"] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key(paramName): res = self.transClient.setTransformationParameter(transID, paramName, paramValue) if not res["OK"]: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"])) gLogger.notice("To add this parameter later please execute the following.") gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): """ Few checks """ if self.paramValues["TransformationID"]: gLogger.info("You are currently working with an active transformation definition.") gLogger.info("If you wish to create a new transformation reset the TransformationID.") gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = ["TransformationName", "Description", "LongDescription", "Type"] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info("%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = raw_input("Please enter the value of " + parameter + " ") plugin = self.paramValues["Plugin"] if plugin: if not plugin in self.supportedPlugins: gLogger.info("The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard") if not res["OK"]: return res self.paramValues["Plugin"] = res["Value"] plugin = self.paramValues["Plugin"] return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues["GroupSize"] if groupSize <= 0: gLogger.info("The GroupSize was found to be less than zero. It has been set to 1.") res = self.setGroupSize(1) if not res["OK"]: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"])) ) requiredParams = ["SourceSE", "TargetSE"] for requiredParam in requiredParams: if (not self.paramValues.has_key(requiredParam)) or (not self.paramValues[requiredParam]): paramValue = raw_input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable(getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR("Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(",", " ").split() res = setter(ses) if not res["OK"]: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections("/Resources/StorageElements") if not res["OK"]: return self._errorReport(res, "Failed to get possible StorageElements") missing = [] for se in seList: if not se in res["Value"]: gLogger.error("StorageElement %s is not known" % se) missing.append(se) if missing: return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default="", insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res["OK"]: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"])) paramValue = res["Value"] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR("Unable to invoke %s, it isn't a member function of Transformation!") res = setter(paramValue) if not res["OK"]: return res return S_OK(paramValue)
class Transformation( API ): ############################################################################# def __init__( self, transID = 0, transClient = None ): """ c'tor """ super( Transformation, self ).__init__() self.paramTypes = { 'TransformationID' : [types.IntType, types.LongType], 'TransformationName' : types.StringTypes, 'Status' : types.StringTypes, 'Description' : types.StringTypes, 'LongDescription' : types.StringTypes, 'Type' : types.StringTypes, 'Plugin' : types.StringTypes, 'AgentType' : types.StringTypes, 'FileMask' : types.StringTypes, 'TransformationGroup' : types.StringTypes, 'GroupSize' : [types.IntType, types.LongType, types.FloatType], 'InheritedFrom' : [types.IntType, types.LongType], 'Body' : types.StringTypes, 'MaxNumberOfTasks' : [types.IntType, types.LongType], 'EventsPerTask' : [types.IntType, types.LongType]} self.paramValues = { 'TransformationID' : 0, 'TransformationName' : '', 'Status' : 'New', 'Description' : '', 'LongDescription' : '', 'Type' : '', 'Plugin' : 'Standard', 'AgentType' : 'Manual', 'FileMask' : '', 'TransformationGroup' : 'General', 'GroupSize' : 1, 'InheritedFrom' : 0, 'Body' : '', 'MaxNumberOfTasks' : 0, 'EventsPerTask' : 0} self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare'] ) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError( 'TransformationID %d does not exist' % transID ) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) ) def setServer( self, server ): self.serverURL = server self.transClient.setServer( self.serverURL ) def getServer( self ): return self.serverURL def reset( self, transID = 0 ): self.__init__( transID ) self.transClient.setServer( self.serverURL ) return S_OK() def setTargetSE( self, seList ): return self.__setSE( 'TargetSE', seList ) def setSourceSE( self, seList ): return self.__setSE( 'SourceSE', seList ) def setBody( self, body ): """ check that the body is a string, or using the proper syntax for multiple operations :param body: transformation body, for example .. code :: python body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }), ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ), ] :type body: string or list of tuples (or lists) of string and dictionaries :raises TypeError: If the structure is not as expected :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used :returns: S_OK, S_ERROR """ self.item_called = "Body" if isinstance( body, basestring ): return self.__setParam( body ) if not isinstance( body, ( list, tuple ) ): raise TypeError( "Expected list or string, but %r is %s" % ( body, type( body ) ) ) for tup in body: if not isinstance( tup, ( tuple, list ) ): raise TypeError( "Expected tuple or list, but %r is %s" % ( tup, type( tup ) ) ) if len( tup ) != 2: raise TypeError( "Expected 2-tuple, but %r is length %d" % ( tup, len( tup ) ) ) if not isinstance( tup[0], basestring ): raise TypeError( "Expected string, but first entry in tuple %r is %s" % ( tup, type( tup[0] ) ) ) if not isinstance( tup[1], dict ): raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % ( tup, type( tup[0] ) ) ) for par, val in tup[1].iteritems(): if not isinstance( par, basestring ): raise TypeError( "Expected string, but key in dictionary %r is %s" % ( par, type( par ) ) ) if not par in Operation.ATTRIBUTE_NAMES: raise ValueError( "Unknown attribute for Operation: %s" % par ) if not isinstance( val, ( basestring, int, long, float, list, tuple, dict ) ): raise TypeError( "Cannot encode %r, in json" % ( val ) ) return self.__setParam( json.dumps( body ) ) def __setSE( self, seParam, seList ): if isinstance( seList, basestring ): try: seList = eval( seList ) except: seList = seList.split( ',' ) elif isinstance( seList, ( list, dict, tuple ) ): seList = list( seList ) else: return S_ERROR( "Bad argument type" ) res = self.__checkSEs( seList ) if not res['OK']: return res self.item_called = seParam return self.__setParam( seList ) def __getattr__( self, name ): if name.find( 'get' ) == 0: item = name[3:] self.item_called = item return self.__getParam if name.find( 'set' ) == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError( name ) def __getParam( self ): if self.item_called == 'Available': return S_OK( self.paramTypes.keys() ) if self.item_called == 'Parameters': return S_OK( self.paramValues ) if self.item_called in self.paramValues: return S_OK( self.paramValues[self.item_called] ) raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called ) def __setParam( self, value ): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type( value ) in self.paramTypes[self.item_called]: change = True else: raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] ) ) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose( "No change of parameter %s required" % self.item_called ) else: gLogger.verbose( "Parameter %s to be changed" % self.item_called ) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value ) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformation( transID, extraParams = True ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName ) continue setter( paramValue ) if printOutput: gLogger.info( "No printing available yet" ) return S_OK( transParams ) def getTransformationLogging( self, printOutput = False ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() res = self.transClient.getTransformationLogging( transID ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' ) return S_OK( loggingList ) def extendTransformation( self, nTasks, printOutput = False ): return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput ) def cleanTransformation( self, printOutput = False ): res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput ) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation( self, printOutput = False ): res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput ) if res['OK']: self.reset() return res def addFilesToTransformation( self, lfns, printOutput = False ): return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput ) def setFileStatusForTransformation( self, status, lfns, printOutput = False ): return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput ) def getTransformationTaskStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput ) def getTransformationStats( self, printOutput = False ): return self.__executeOperation( 'getTransformationStats', printOutput = printOutput ) def deleteTasks( self, taskMin, taskMax, printOutput = False ): return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput ) def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ): return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput ) def setTaskStatus( self, taskID, status, printOutput = False ): return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput ) def __executeOperation( self, operation, *parms, **kwds ): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal( "No TransformationID known" ) return S_ERROR() printOutput = kwds.pop( 'printOutput' ) fcn = None if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ): fcn = getattr( self.transClient, operation ) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn( transID, *parms, **kwds ) if printOutput: self._prettyPrint( res ) return res def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy ) return res def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ): condDict = {'TransformationID':self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy ) return res ############################################################################# def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getAuthorDNfromProxy( self ): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error( "Unable to get uploaded proxy Info %s " % res['Message'] ) return S_ERROR( res['Message'] ) res = {'username' : username, 'authorDN' : author } return S_OK( res ) ############################################################################# def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN'], orderBy = 'TransformationID', printOutput = False ): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error( res['Message'] ) return S_ERROR( res['Message'] ) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not ( userName == "" or userName == foundUserName ): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) ) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) ) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % ( userName, ', '.join( transStatus ) ) ) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % ( authorDN, ', '.join( transStatus ) ) ) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations( condDict = condDict ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res if printOutput: if not outputFields: gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) ) elif not res['Value']: gLogger.info( "No tasks found for selection" ) else: self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy ) return res ############################################################################# def getSummaryTransformations( self , transID = [] ): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = { 'TransformationID' : transID } orderby = [] start = 0 maxitems = len( transID ) paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \ 'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \ 'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \ 'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems ) if not result['OK']: self._prettyPrint( result ) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map( lambda pname: paramValues[ paramNames.index( pname ) ], paramShowNames ) showDict = dict( zip( paramShowNamesShort, paramShowValues ) ) dictList.append( showDict ) except Exception as x: print 'Exception %s ' % str( x ) if not len( dictList ) > 0: gLogger.error( 'No found transformations satisfying input condition' ) return S_ERROR( 'No found transformations satisfying input condition' ) else: print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] ) return S_OK( dictList ) ############################################################################# def addTransformation( self, addFiles = True, printOutput = False ): res = self._checkCreation() if not res['OK']: return self._errorReport( res, 'Failed transformation sanity check' ) if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint( self.paramValues ) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup = self.paramValues['TransformationGroup'], groupSize = self.paramValues['GroupSize'], inheritedFrom = self.paramValues['InheritedFrom'], body = self.paramValues['Body'], maxTasks = self.paramValues['MaxNumberOfTasks'], eventsPerTask = self.paramValues['EventsPerTask'], addFiles = addFiles ) if not res['OK']: if printOutput: self._prettyPrint( res ) return res transID = res['Value'] self.exists = True self.setTransformationID( transID ) gLogger.notice( "Created transformation %d" % transID ) for paramName, paramValue in self.paramValues.items(): if paramName not in self.paramTypes: res = self.transClient.setTransformationParameter( transID, paramName, paramValue ) if not res['OK']: gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) ) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice( "oTransformation = Transformation(%d)" % transID ) gLogger.notice( "oTransformation.set%s(...)" % paramName ) return S_OK( transID ) def _checkCreation( self ): """ Few checks """ if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info( "oTransformation.reset()" ) return S_ERROR() requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type'] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter ) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " " ) plugin = self.paramValues['Plugin'] if plugin: if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin ) res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' ) if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] return S_OK() def _checkBySizePlugin( self ): return self._checkStandardPlugin() def _checkBySharePlugin( self ): return self._checkStandardPlugin() def _checkStandardPlugin( self ): groupSize = self.paramValues['GroupSize'] if groupSize <= 0: gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize( 1 ) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin( self ): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( ', '.join( ['SourceSE', 'TargetSE'] ) ) ) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if not self.paramValues.get( requiredParam ): paramValue = raw_input( "Please enter " + requiredParam + " " ) setter = None setterName = "set%s" % requiredParam if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName ) ses = paramValue.replace( ',', ' ' ).split() res = setter( ses ) if not res['OK']: return res return S_OK() def __checkSEs( self, seList ): res = gConfig.getSections( '/Resources/StorageElements' ) if not res['OK']: return self._errorReport( res, 'Failed to get possible StorageElements' ) missing = set( seList ) - set( res['Value'] ) if missing: for se in missing: gLogger.error( "StorageElement %s is not known" % se ) return S_ERROR( "%d StorageElements not known" % len( missing ) ) return S_OK() def __promptForParameter( self, parameter, choices = [], default = '', insert = True ): res = promptUser( "Please enter %s" % parameter, choices = choices, default = default ) if not res['OK']: return self._errorReport( res ) gLogger.notice( "%s will be set to '%s'" % ( parameter, res['Value'] ) ) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr( self, setterName ) and callable( getattr( self, setterName ) ): setter = getattr( self, setterName ) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter( paramValue ) if not res['OK']: return res return S_OK( paramValue )
class TransformationCleaningAgent(AgentModule): """ .. class:: TransformationCleaningAgent :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance :param ~TransformationClient.TransformationClient transClient: TransformationClient instance :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance """ def __init__(self, *args, **kwargs): """c'tor""" AgentModule.__init__(self, *args, **kwargs) self.shifterProxy = None # # transformation client self.transClient = None # # wms client self.wmsClient = None # # request client self.reqClient = None # # file catalog client self.metadataClient = None # # transformations types self.transformationTypes = None # # directory locations self.directoryLocations = ["TransformationDB", "MetadataCatalog"] # # transformation metadata self.transfidmeta = "TransformationID" # # archive periof in days self.archiveAfter = 7 # # transformation log SEs self.logSE = "LogSE" # # enable/disable execution self.enableFlag = "True" self.dataProcTTypes = ["MCSimulation", "Merge"] self.dataManipTTypes = ["Replication", "Removal"] def initialize(self): """agent initialisation reading and setting config opts :param self: self reference """ # # shifter proxy # See cleanContent method: this proxy will be used ALSO when the file catalog used # is the DIRAC File Catalog (DFC). # This is possible because of unset of the "UseServerCertificate" option self.shifterProxy = self.am_getOption("shifterProxy", self.shifterProxy) # # transformations types self.dataProcTTypes = Operations().getValue( "Transformations/DataProcessing", self.dataProcTTypes) self.dataManipTTypes = Operations().getValue( "Transformations/DataManipulation", self.dataManipTTypes) agentTSTypes = self.am_getOption("TransformationTypes", []) if agentTSTypes: self.transformationTypes = sorted(agentTSTypes) else: self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes) self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes)) # # directory locations self.directoryLocations = sorted( self.am_getOption("DirectoryLocations", self.directoryLocations)) self.log.info( "Will search for directories in the following locations: %s" % str(self.directoryLocations)) # # transformation metadata self.transfidmeta = self.am_getOption("TransfIDMeta", self.transfidmeta) self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta) # # archive periof in days self.archiveAfter = self.am_getOption("ArchiveAfter", self.archiveAfter) # days self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter) # # transformation log SEs self.logSE = Operations().getValue("/LogStorage/LogSE", self.logSE) self.log.info("Will remove logs found on storage element: %s" % self.logSE) # # transformation client self.transClient = TransformationClient() # # wms client self.wmsClient = WMSClient() # # request client self.reqClient = ReqClient() # # file catalog client self.metadataClient = FileCatalogClient() # # job monitoring client self.jobMonitoringClient = JobMonitoringClient() return S_OK() ############################################################################# def execute(self): """execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption("EnableFlag", self.enableFlag) if self.enableFlag != "True": self.log.info( "TransformationCleaningAgent is disabled by configuration option EnableFlag" ) return S_OK("Disabled via CS flag") # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations({ "Status": "Cleaning", "Type": self.transformationTypes }) if res["OK"]: for transDict in res["Value"]: if self.shifterProxy: self._executeClean(transDict) else: self.log.info( "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) else: self.log.error("Failed to get transformations", res["Message"]) # Obtain the transformations in RemovingFiles status and removes the output files res = self.transClient.getTransformations({ "Status": "RemovingFiles", "Type": self.transformationTypes }) if res["OK"]: for transDict in res["Value"]: if self.shifterProxy: self._executeRemoval(transDict) else: self.log.info( "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeRemoval)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) else: self.log.error("Could not get the transformations", res["Message"]) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter) res = self.transClient.getTransformations( { "Status": "Completed", "Type": self.transformationTypes }, older=olderThanTime, timeStamp="LastUpdate") if res["OK"]: for transDict in res["Value"]: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info( "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) else: self.log.error("Could not get the transformations", res["Message"]) return S_OK() def finalize(self): """Only at finalization: will clean ancient transformations (remnants) 1) get the transformation IDs of jobs that are older than 1 year 2) find the status of those transformations. Those "Cleaned" and "Archived" will be cleaned and archived (again) Why doing this here? Basically, it's a race: 1) the production manager submits a transformation 2) the TransformationAgent, and a bit later the WorkflowTaskAgent, put such transformation in their internal queue, so eventually during their (long-ish) cycle they'll work on it. 3) 1 minute after creating the transformation, the production manager cleans it (by hand, for whatever reason). So, the status is changed to "Cleaning" 4) the TransformationCleaningAgent cleans what has been created (maybe, nothing), then sets the transformation status to "Cleaned" or "Archived" 5) a bit later the TransformationAgent, and later the WorkflowTaskAgent, kick in, creating tasks and jobs for a production that's effectively cleaned (but these 2 agents don't know yet). Of course, one could make one final check in TransformationAgent or WorkflowTaskAgent, but these 2 agents are already doing a lot of stuff, and are pretty heavy. So, we should just clean from time to time. What I added here is done only when the agent finalize, and it's quite light-ish operation anyway. """ res = self.jobMonitoringClient.getJobGroups( None, datetime.utcnow() - timedelta(days=365)) if not res["OK"]: self.log.error("Failed to get job groups", res["Message"]) return res transformationIDs = res["Value"] if transformationIDs: res = self.transClient.getTransformations( {"TransformationID": transformationIDs}) if not res["OK"]: self.log.error("Failed to get transformations", res["Message"]) return res transformations = res["Value"] toClean = [] toArchive = [] for transDict in transformations: if transDict["Status"] == "Cleaned": toClean.append(transDict) if transDict["Status"] == "Archived": toArchive.append(transDict) for transDict in toClean: if self.shifterProxy: self._executeClean(transDict) else: self.log.info( "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) for transDict in toArchive: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info( "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) # Remove JobIDs that were unknown to the TransformationSystem jobGroupsToCheck = [ str(transDict["TransformationID"]).zfill(8) for transDict in toClean + toArchive ] res = self.jobMonitoringClient.getJobs( {"JobGroup": jobGroupsToCheck}) if not res["OK"]: return res jobIDsToRemove = [int(jobID) for jobID in res["Value"]] res = self.__removeWMSTasks(jobIDsToRemove) if not res["OK"]: return res return S_OK() def _executeClean(self, transDict): """Clean transformation.""" # if transformation is of type `Replication` or `Removal`, there is nothing to clean. # We just archive if transDict["Type"] in self.dataManipTTypes: res = self.archiveTransformation(transDict["TransformationID"]) if not res["OK"]: self.log.error( "Problems archiving transformation", "%s: %s" % (transDict["TransformationID"], res["Message"])) else: res = self.cleanTransformation(transDict["TransformationID"]) if not res["OK"]: self.log.error( "Problems cleaning transformation", "%s: %s" % (transDict["TransformationID"], res["Message"])) def _executeRemoval(self, transDict): """Remove files from given transformation.""" res = self.removeTransformationOutput(transDict["TransformationID"]) if not res["OK"]: self.log.error( "Problems removing transformation", "%s: %s" % (transDict["TransformationID"], res["Message"])) def _executeArchive(self, transDict): """Archive the given transformation.""" res = self.archiveTransformation(transDict["TransformationID"]) if not res["OK"]: self.log.error( "Problems archiving transformation", "%s: %s" % (transDict["TransformationID"], res["Message"])) return S_OK() ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories(self, transID): """get the directories for the supplied transformation from the transformation system. These directories are used by removeTransformationOutput and cleanTransformation for removing output. :param self: self reference :param int transID: transformation ID """ self.log.verbose( "Cleaning Transformation directories of transformation %d" % transID) directories = [] if "TransformationDB" in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ["OutputDirectories"]) if not res["OK"]: self.log.error("Failed to obtain transformation directories", res["Message"]) return res transDirectories = [] if res["Value"]: if not isinstance(res["Value"], list): try: transDirectories = ast.literal_eval(res["Value"]) except Exception: # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]' transDirectories.append(res["Value"]) else: transDirectories = res["Value"] directories = self._addDirs(transID, transDirectories, directories) if "MetadataCatalog" in self.directoryLocations: res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta: transID}) if not res["OK"]: self.log.error("Failed to obtain metadata catalog directories", res["Message"]) return res transDirectories = res["Value"] directories = self._addDirs(transID, transDirectories, directories) if not directories: self.log.info("No output directories found") directories = sorted(directories) return S_OK(directories) @classmethod def _addDirs(cls, transID, newDirs, existingDirs): """append unique :newDirs: list to :existingDirs: list :param self: self reference :param int transID: transformationID :param list newDirs: src list of paths :param list existingDirs: dest list of paths """ for folder in newDirs: transStr = str(transID).zfill(8) if re.search(transStr, str(folder)): if folder not in existingDirs: existingDirs.append(os.path.normpath(folder)) return existingDirs ############################################################################# # # These are the methods for performing the cleaning of catalogs and storage # def cleanContent(self, directory): """wipe out everything from catalog under folder :directory: :param self: self reference :params str directory: folder name """ self.log.verbose("Cleaning Catalog contents") res = self.__getCatalogDirectoryContents([directory]) if not res["OK"]: return res filesFound = res["Value"] if not filesFound: self.log.info( "No files are registered in the catalog directory %s" % directory) return S_OK() self.log.info( "Attempting to remove possible remnants from the catalog and storage", "(n=%d)" % len(filesFound)) # Executing with shifter proxy gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "false") res = DataManager().removeFile(filesFound, force=True) gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "true") if not res["OK"]: return res realFailure = False for lfn, reason in res["Value"]["Failed"].items(): if "File does not exist" in str(reason): self.log.warn("File %s not found in some catalog: " % (lfn)) else: self.log.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason)) realFailure = True if realFailure: return S_ERROR("Failed to remove all files found in the catalog") return S_OK() def __getCatalogDirectoryContents(self, directories): """get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info("Obtaining the catalog contents for %d directories:" % len(directories)) for directory in directories: self.log.info(directory) activeDirs = directories allFiles = {} fc = FileCatalog() while activeDirs: currentDir = activeDirs[0] res = returnSingleResult(fc.listDirectory(currentDir)) activeDirs.remove(currentDir) if not res["OK"] and "Directory does not exist" in res[ "Message"]: # FIXME: DFC should return errno self.log.info("The supplied directory %s does not exist" % currentDir) elif not res["OK"]: if "No such file or directory" in res["Message"]: self.log.info("%s: %s" % (currentDir, res["Message"])) else: self.log.error( "Failed to get directory %s content" % currentDir, res["Message"]) else: dirContents = res["Value"] activeDirs.extend(dirContents["SubDirs"]) allFiles.update(dirContents["Files"]) self.log.info("", "Found %d files" % len(allFiles)) return S_OK(list(allFiles)) def cleanTransformationLogFiles(self, directory): """clean up transformation logs from directory :directory: :param self: self reference :param str directory: folder name """ self.log.verbose("Removing log files found in the directory", directory) res = returnSingleResult( StorageElement(self.logSE).removeDirectory(directory, recursive=True)) if not res["OK"]: if cmpError(res, errno.ENOENT): # No such file or directory self.log.warn("Transformation log directory does not exist", directory) return S_OK() self.log.error("Failed to remove log files", res["Message"]) return res self.log.info("Successfully removed transformation log directory") return S_OK() ############################################################################# # # These are the functional methods for archiving and cleaning transformations # def removeTransformationOutput(self, transID): """This just removes any mention of the output data from the catalog and storage""" self.log.info("Removing output data for transformation %s" % transID) res = self.getTransformationDirectories(transID) if not res["OK"]: self.log.error("Problem obtaining directories for transformation", "%s with result '%s'" % (transID, res)) return S_OK() directories = res["Value"] for directory in directories: if not re.search("/LOG/", directory): res = self.cleanContent(directory) if not res["OK"]: return res self.log.info("Removed %d directories from the catalog \ and its files from the storage for transformation %s" % (len(directories), transID)) # Clean ALL the possible remnants found in the metadata catalog res = self.cleanMetadataCatalogFiles(transID) if not res["OK"]: return res self.log.info("Successfully removed output of transformation", transID) # Change the status of the transformation to RemovedFiles res = self.transClient.setTransformationParameter( transID, "Status", "RemovedFiles") if not res["OK"]: self.log.error( "Failed to update status of transformation %s to RemovedFiles" % (transID), res["Message"]) return res self.log.info("Updated status of transformation %s to RemovedFiles" % (transID)) return S_OK() def archiveTransformation(self, transID): """This just removes job from the jobDB and the transformation DB :param self: self reference :param int transID: transformation ID """ self.log.info("Archiving transformation %s" % transID) # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res["OK"]: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res["OK"]: return res self.log.info("Successfully archived transformation %d" % transID) # Change the status of the transformation to archived res = self.transClient.setTransformationParameter( transID, "Status", "Archived") if not res["OK"]: self.log.error( "Failed to update status of transformation %s to Archived" % (transID), res["Message"]) return res self.log.info("Updated status of transformation %s to Archived" % (transID)) return S_OK() def cleanTransformation(self, transID): """This removes what was produced by the supplied transformation, leaving only some info and log in the transformation DB. """ self.log.info("Cleaning transformation", transID) res = self.getTransformationDirectories(transID) if not res["OK"]: self.log.error("Problem obtaining directories for transformation", "%s with result '%s'" % (transID, res["Message"])) return S_OK() directories = res["Value"] # Clean the jobs in the WMS and any failover requests found res = self.cleanTransformationTasks(transID) if not res["OK"]: return res # Clean the log files for the jobs for directory in directories: if re.search("/LOG/", directory): res = self.cleanTransformationLogFiles(directory) if not res["OK"]: return res res = self.cleanContent(directory) if not res["OK"]: return res # Clean ALL the possible remnants found res = self.cleanMetadataCatalogFiles(transID) if not res["OK"]: return res # Clean the transformation DB of the files and job information res = self.transClient.cleanTransformation(transID) if not res["OK"]: return res self.log.info("Successfully cleaned transformation", transID) res = self.transClient.setTransformationParameter( transID, "Status", "Cleaned") if not res["OK"]: self.log.error( "Failed to update status of transformation %s to Cleaned" % (transID), res["Message"]) return res self.log.info("Updated status of transformation", "%s to Cleaned" % (transID)) return S_OK() def cleanMetadataCatalogFiles(self, transID): """wipe out files from catalog""" res = self.metadataClient.findFilesByMetadata( {self.transfidmeta: transID}) if not res["OK"]: return res fileToRemove = res["Value"] if not fileToRemove: self.log.info("No files found for transID", transID) return S_OK() # Executing with shifter proxy gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "false") res = DataManager().removeFile(fileToRemove, force=True) gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "true") if not res["OK"]: return res for lfn, reason in res["Value"]["Failed"].items(): self.log.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason)) if res["Value"]["Failed"]: return S_ERROR( "Failed to remove all files found in the metadata catalog") self.log.info("Successfully removed all files found in the DFC") return S_OK() ############################################################################# # # These are the methods for removing the jobs from the WMS and transformation DB # def cleanTransformationTasks(self, transID): """clean tasks from WMS, or from the RMS if it is a DataManipulation transformation""" self.log.verbose("Cleaning Transformation tasks of transformation", transID) res = self.__getTransformationExternalIDs(transID) if not res["OK"]: return res externalIDs = res["Value"] if externalIDs: res = self.transClient.getTransformationParameters( transID, ["Type"]) if not res["OK"]: self.log.error("Failed to determine transformation type") return res transType = res["Value"] if transType in self.dataProcTTypes: res = self.__removeWMSTasks(externalIDs) else: res = self.__removeRequests(externalIDs) if not res["OK"]: return res return S_OK() def __getTransformationExternalIDs(self, transID): """collect all ExternalIDs for transformation :transID: :param self: self reference :param int transID: transforamtion ID """ res = self.transClient.getTransformationTasks( condDict={"TransformationID": transID}) if not res["OK"]: self.log.error( "Failed to get externalIDs for transformation %d" % transID, res["Message"]) return res externalIDs = [taskDict["ExternalID"] for taskDict in res["Value"]] self.log.info("Found %d tasks for transformation" % len(externalIDs)) return S_OK(externalIDs) def __removeRequests(self, requestIDs): """This will remove requests from the RMS system -""" rIDs = [int(int(j)) for j in requestIDs if int(j)] for reqID in rIDs: self.reqClient.cancelRequest(reqID) return S_OK() def __removeWMSTasks(self, transJobIDs): """delete jobs (mark their status as "JobStatus.DELETED") and their requests from the system :param self: self reference :param list trasnJobIDs: job IDs """ # Prevent 0 job IDs jobIDs = [int(j) for j in transJobIDs if int(j)] allRemove = True for jobList in breakListIntoChunks(jobIDs, 500): res = self.wmsClient.killJob(jobList) if res["OK"]: self.log.info("Successfully killed %d jobs from WMS" % len(jobList)) elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res): self.log.info("Found jobs which did not exist in the WMS", "(n=%d)" % len(res["InvalidJobIDs"])) elif "NonauthorizedJobIDs" in res: self.log.error("Failed to kill jobs because not authorized", "(n=%d)" % len(res["NonauthorizedJobIDs"])) allRemove = False elif "FailedJobIDs" in res: self.log.error("Failed to kill jobs", "(n=%d)" % len(res["FailedJobIDs"])) allRemove = False res = self.wmsClient.deleteJob(jobList) if res["OK"]: self.log.info("Successfully deleted jobs from WMS", "(n=%d)" % len(jobList)) elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res): self.log.info("Found jobs which did not exist in the WMS", "(n=%d)" % len(res["InvalidJobIDs"])) elif "NonauthorizedJobIDs" in res: self.log.error("Failed to delete jobs because not authorized", "(n=%d)" % len(res["NonauthorizedJobIDs"])) allRemove = False elif "FailedJobIDs" in res: self.log.error("Failed to delete jobs", "(n=%d)" % len(res["FailedJobIDs"])) allRemove = False if not allRemove: return S_ERROR("Failed to delete all remnants from WMS") self.log.info("Successfully deleted all tasks from the WMS") if not jobIDs: self.log.info( "JobIDs not present, unable to delete associated requests.") return S_OK() failed = 0 failoverRequests = {} res = self.reqClient.getRequestIDsForJobs(jobIDs) if not res["OK"]: self.log.error("Failed to get requestID for jobs.", res["Message"]) return res failoverRequests.update(res["Value"]["Successful"]) if not failoverRequests: return S_OK() for jobID, requestID in res["Value"]["Successful"].items(): # Put this check just in case, tasks must have associated jobs if jobID == 0 or jobID == "0": continue res = self.reqClient.cancelRequest(requestID) if not res["OK"]: self.log.error("Failed to remove request from RequestDB", res["Message"]) failed += 1 else: self.log.verbose("Removed request %s associated to job %d." % (requestID, jobID)) if failed: self.log.info("Successfully removed requests", "(n=%d)" % (len(failoverRequests) - failed)) self.log.info("Failed to remove requests", "(n=%d)" % failed) return S_ERROR("Failed to remove all the request from RequestDB") self.log.info( "Successfully removed all the associated failover requests") return S_OK()
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { 'TransformationID': [types.IntType, types.LongType], 'TransformationName': types.StringTypes, 'Status': types.StringTypes, 'Description': types.StringTypes, 'LongDescription': types.StringTypes, 'Type': types.StringTypes, 'Plugin': types.StringTypes, 'AgentType': types.StringTypes, 'FileMask': types.StringTypes, 'TransformationGroup': types.StringTypes, 'GroupSize': [types.IntType, types.LongType, types.FloatType], 'InheritedFrom': [types.IntType, types.LongType], 'Body': types.StringTypes, 'MaxNumberOfTasks': [types.IntType, types.LongType], 'EventsPerTask': [types.IntType, types.LongType] } self.paramValues = { 'TransformationID': 0, 'TransformationName': '', 'Status': 'New', 'Description': '', 'LongDescription': '', 'Type': '', 'Plugin': 'Standard', 'AgentType': 'Manual', 'FileMask': '', 'TransformationGroup': 'General', 'GroupSize': 1, 'InheritedFrom': 0, 'Body': '', 'MaxNumberOfTasks': 0, 'EventsPerTask': 0 } self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError, 'TransformationID %d does not exist' % transID else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE('TargetSE', seList) def setSourceSE(self, seList): return self.__setSE('SourceSE', seList) def __setSE(self, se, seList): if type(seList) in types.StringTypes: try: seList = eval(seList) except: seList = seList.replace(',', ' ').split() res = self.__checkSEs(seList) if not res['OK']: return res self.item_called = se return self.__setParam(seList) def __getattr__(self, name): if name.find('get') == 0: item = name[3:] self.item_called = item return self.__getParam if name.find('set') == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError, name def __getParam(self): if self.item_called == 'Available': return S_OK(self.paramTypes.keys()) if self.item_called == 'Parameters': return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called def __setParam(self, value): change = False if self.item_called in self.paramTypes: oldValue = self.paramValues[self.item_called] if oldValue != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type(value), self.paramTypes[self.item_called]) if not self.item_called in self.paramTypes.keys(): if not self.paramValues.has_key(self.item_called): change = True else: oldValue = self.paramValues[self.item_called] if oldValue != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res['OK']: if printOutput: self._prettyPrint(res) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res['OK']: if printOutput: self._prettyPrint(res) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate') return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation('extendTransformation', nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation('cleanTransformation', printOutput=printOutput) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation('deleteTransformation', printOutput=printOutput) if res['OK']: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation('addFilesToTransformation', lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation('setFileStatusForTransformation', status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation('getTransformationTaskStats', printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation('getTransformationStats', printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation('deleteTasks', taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se='Unknown', printOutput=False): return self.__executeOperation('addTaskForTransformation', lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation('setTaskStatus', taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop('printOutput') fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles(self, fileStatus=[], lfns=[], outputFields=[ 'FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate' ], orderBy='FileID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'FileID', orderBy) return res def getTransformationTasks(self, taskStatus=[], taskIDs=[], outputFields=[ 'TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime' ], orderBy='TaskID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TaskID', orderBy) return res ############################################################################# def getTransformations(self, transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate' ], orderBy='TransformationID', printOutput=False): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): res = self._checkCreation() if not res['OK']: return self._errorReport(res, 'Failed transformation sanity check') if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup=self.paramValues['TransformationGroup'], groupSize=self.paramValues['GroupSize'], inheritedFrom=self.paramValues['InheritedFrom'], body=self.paramValues['Body'], maxTasks=self.paramValues['MaxNumberOfTasks'], eventsPerTask=self.paramValues['EventsPerTask'], addFiles=addFiles) if not res['OK']: if printOutput: self._prettyPrint(res) return res transID = res['Value'] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if not self.paramTypes.has_key(paramName): res = self.transClient.setTransformationParameter( transID, paramName, paramValue) if not res['OK']: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res['Message'])) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = [ 'TransformationName', 'Description', 'LongDescription', 'Type' ] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " ") plugin = self.paramValues['Plugin'] if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter('Plugin', choices=self.supportedPlugins, default='Standard') if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] #checkPlugin = "_check%sPlugin" % plugin #fcn = None #if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ): # fcn = getattr( self, checkPlugin ) #if not fcn: # return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin ) #res = fcn() return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues['GroupSize'] if (groupSize <= 0): gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize(1) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (', '.join(['SourceSE', 'TargetSE']))) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if (not self.paramValues.has_key(requiredParam)) or ( not self.paramValues[requiredParam]): paramValue = raw_input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable( getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(',', ' ').split() res = setter(ses) if not res['OK']: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections('/Resources/StorageElements') if not res['OK']: return self._errorReport(res, 'Failed to get possible StorageElements') missing = [] for se in seList: if not se in res['Value']: gLogger.error("StorageElement %s is not known" % se) missing.append(se) if missing: return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default='', insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res['OK']: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res['Value'])) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter(paramValue) if not res['OK']: return res return S_OK(paramValue)
gLogger.error("Production ID is 0 or Tasks is 0, cannot be") dexit(1) from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient tc = TransformationClient() res = tc.getTransformation(clip.prod) trans= res['Value'] transp = trans['Plugin'] if transp != 'Limited': gLogger.error("This cannot be used on productions that are not using the 'Limited' plugin") dexit(0) gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks']) ) if clip.tasks >0: max_tasks = trans['MaxNumberOfTasks'] + clip.tasks groupsize = trans['GroupSize'] gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %(clip.tasks, clip.tasks*groupsize, clip.prod)) elif clip.tasks <0: max_tasks = -1 gLogger.notice("Now all existing files in the DB for production %s will be processed." % clip.prod) else: gLogger.error("Number of tasks must be different from 0") dexit(1) res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks', max_tasks) if not res['OK']: gLogger.error(res['Message']) dexit(1) gLogger.notice("Production %s extended!" % clip.prod) dexit(0)
class Transformation(API): ############################################################################# def __init__(self, transID=0, transClient=None): """ c'tor """ super(Transformation, self).__init__() self.paramTypes = { 'TransformationID': [types.IntType, types.LongType], 'TransformationName': types.StringTypes, 'Status': types.StringTypes, 'Description': types.StringTypes, 'LongDescription': types.StringTypes, 'Type': types.StringTypes, 'Plugin': types.StringTypes, 'AgentType': types.StringTypes, 'FileMask': types.StringTypes, 'TransformationGroup': types.StringTypes, 'GroupSize': [types.IntType, types.LongType, types.FloatType], 'InheritedFrom': [types.IntType, types.LongType], 'Body': types.StringTypes, 'MaxNumberOfTasks': [types.IntType, types.LongType], 'EventsPerTask': [types.IntType, types.LongType] } self.paramValues = { 'TransformationID': 0, 'TransformationName': '', 'Status': 'New', 'Description': '', 'LongDescription': '', 'Type': '', 'Plugin': 'Standard', 'AgentType': 'Manual', 'FileMask': '', 'TransformationGroup': 'General', 'GroupSize': 1, 'InheritedFrom': 0, 'Body': '', 'MaxNumberOfTasks': 0, 'EventsPerTask': 0 } self.ops = Operations() self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins', ['Broadcast', 'Standard', 'BySize', 'ByShare']) if not transClient: self.transClient = TransformationClient() else: self.transClient = transClient self.serverURL = self.transClient.getServer() self.exists = False if transID: self.paramValues['TransformationID'] = transID res = self.getTransformation() if res['OK']: self.exists = True elif res['Message'] == 'Transformation does not exist': raise AttributeError('TransformationID %d does not exist' % transID) else: self.paramValues['TransformationID'] = 0 gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)) def setServer(self, server): self.serverURL = server self.transClient.setServer(self.serverURL) def getServer(self): return self.serverURL def reset(self, transID=0): self.__init__(transID) self.transClient.setServer(self.serverURL) return S_OK() def setTargetSE(self, seList): return self.__setSE('TargetSE', seList) def setSourceSE(self, seList): return self.__setSE('SourceSE', seList) def setBody(self, body): """ check that the body is a string, or using the proper syntax for multiple operations :param body: transformation body, for example .. code :: python body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }), ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ), ] :type body: string or list of tuples (or lists) of string and dictionaries :raises TypeError: If the structure is not as expected :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used :returns: S_OK, S_ERROR """ self.item_called = "Body" if isinstance(body, basestring): return self.__setParam(body) if not isinstance(body, (list, tuple)): raise TypeError("Expected list or string, but %r is %s" % (body, type(body))) for tup in body: if not isinstance(tup, (tuple, list)): raise TypeError("Expected tuple or list, but %r is %s" % (tup, type(tup))) if len(tup) != 2: raise TypeError("Expected 2-tuple, but %r is length %d" % (tup, len(tup))) if not isinstance(tup[0], basestring): raise TypeError( "Expected string, but first entry in tuple %r is %s" % (tup, type(tup[0]))) if not isinstance(tup[1], dict): raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % (tup, type(tup[0]))) for par, val in tup[1].iteritems(): if not isinstance(par, basestring): raise TypeError( "Expected string, but key in dictionary %r is %s" % (par, type(par))) if not par in Operation.ATTRIBUTE_NAMES: raise ValueError("Unknown attribute for Operation: %s" % par) if not isinstance( val, (basestring, int, long, float, list, tuple, dict)): raise TypeError("Cannot encode %r, in json" % (val)) return self.__setParam(json.dumps(body)) def __setSE(self, seParam, seList): if isinstance(seList, basestring): try: seList = eval(seList) except BaseException: seList = seList.split(',') elif isinstance(seList, (list, dict, tuple)): seList = list(seList) else: return S_ERROR("Bad argument type") res = self.__checkSEs(seList) if not res['OK']: return res self.item_called = seParam return self.__setParam(seList) def __getattr__(self, name): if name.find('get') == 0: item = name[3:] self.item_called = item return self.__getParam if name.find('set') == 0: item = name[3:] self.item_called = item return self.__setParam raise AttributeError(name) def __getParam(self): if self.item_called == 'Available': return S_OK(self.paramTypes.keys()) if self.item_called == 'Parameters': return S_OK(self.paramValues) if self.item_called in self.paramValues: return S_OK(self.paramValues[self.item_called]) raise AttributeError("Unknown parameter for transformation: %s" % self.item_called) def __setParam(self, value): change = False if self.item_called in self.paramTypes: if self.paramValues[self.item_called] != value: if type(value) in self.paramTypes[self.item_called]: change = True else: raise TypeError("%s %s %s expected one of %s" % (self.item_called, value, type(value), self.paramTypes[self.item_called])) else: if self.item_called not in self.paramValues: change = True else: if self.paramValues[self.item_called] != value: change = True if not change: gLogger.verbose("No change of parameter %s required" % self.item_called) else: gLogger.verbose("Parameter %s to be changed" % self.item_called) transID = self.paramValues['TransformationID'] if self.exists and transID: res = self.transClient.setTransformationParameter( transID, self.item_called, value) if not res['OK']: return res self.paramValues[self.item_called] = value return S_OK() def getTransformation(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformation(transID, extraParams=True) if not res['OK']: if printOutput: self._prettyPrint(res) return res transParams = res['Value'] for paramName, paramValue in transParams.items(): setter = None setterName = "set%s" % paramName if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setterName: gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName) continue setter(paramValue) if printOutput: gLogger.info("No printing available yet") return S_OK(transParams) def getTransformationLogging(self, printOutput=False): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() res = self.transClient.getTransformationLogging(transID) if not res['OK']: if printOutput: self._prettyPrint(res) return res loggingList = res['Value'] if printOutput: self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate') return S_OK(loggingList) def extendTransformation(self, nTasks, printOutput=False): return self.__executeOperation('extendTransformation', nTasks, printOutput=printOutput) def cleanTransformation(self, printOutput=False): res = self.__executeOperation('cleanTransformation', printOutput=printOutput) if res['OK']: self.paramValues['Status'] = 'Cleaned' return res def deleteTransformation(self, printOutput=False): res = self.__executeOperation('deleteTransformation', printOutput=printOutput) if res['OK']: self.reset() return res def addFilesToTransformation(self, lfns, printOutput=False): return self.__executeOperation('addFilesToTransformation', lfns, printOutput=printOutput) def setFileStatusForTransformation(self, status, lfns, printOutput=False): return self.__executeOperation('setFileStatusForTransformation', status, lfns, printOutput=printOutput) def getTransformationTaskStats(self, printOutput=False): return self.__executeOperation('getTransformationTaskStats', printOutput=printOutput) def getTransformationStats(self, printOutput=False): return self.__executeOperation('getTransformationStats', printOutput=printOutput) def deleteTasks(self, taskMin, taskMax, printOutput=False): return self.__executeOperation('deleteTasks', taskMin, taskMax, printOutput=printOutput) def addTaskForTransformation(self, lfns=[], se='Unknown', printOutput=False): return self.__executeOperation('addTaskForTransformation', lfns, se, printOutput=printOutput) def setTaskStatus(self, taskID, status, printOutput=False): return self.__executeOperation('setTaskStatus', taskID, status, printOutput=printOutput) def __executeOperation(self, operation, *parms, **kwds): transID = self.paramValues['TransformationID'] if not transID: gLogger.fatal("No TransformationID known") return S_ERROR() printOutput = kwds.pop('printOutput') fcn = None if hasattr(self.transClient, operation) and callable( getattr(self.transClient, operation)): fcn = getattr(self.transClient, operation) if not fcn: return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" ) res = fcn(transID, *parms, **kwds) if printOutput: self._prettyPrint(res) return res def getTransformationFiles(self, fileStatus=[], lfns=[], outputFields=[ 'FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate' ], orderBy='FileID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if fileStatus: condDict['Status'] = fileStatus if lfns: condDict['LFN'] = lfns res = self.transClient.getTransformationFiles(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'FileID', orderBy) return res def getTransformationTasks(self, taskStatus=[], taskIDs=[], outputFields=[ 'TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime' ], orderBy='TaskID', printOutput=False): condDict = {'TransformationID': self.paramValues['TransformationID']} if taskStatus: condDict['ExternalStatus'] = taskStatus if taskIDs: condDict['TaskID'] = taskIDs res = self.transClient.getTransformationTasks(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TaskID', orderBy) return res ############################################################################# def getTransformations(self, transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate' ], orderBy='TransformationID', printOutput=False): condDict = {} if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getAuthorDNfromProxy(self): """ gets the AuthorDN and username of the transformation from the uploaded proxy """ username = "" author = "" res = getProxyInfo() if res['OK']: author = res['Value']['identity'] username = res['Value']['username'] else: gLogger.error("Unable to get uploaded proxy Info %s " % res['Message']) return S_ERROR(res['Message']) res = {'username': username, 'authorDN': author} return S_OK(res) ############################################################################# def getTransformationsByUser(self, authorDN="", userName="", transID=[], transStatus=[], outputFields=[ 'TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate', 'AuthorDN' ], orderBy='TransformationID', printOutput=False): condDict = {} if authorDN == "": res = self.getAuthorDNfromProxy() if not res['OK']: gLogger.error(res['Message']) return S_ERROR(res['Message']) else: foundUserName = res['Value']['username'] foundAuthor = res['Value']['authorDN'] # If the username whom created the uploaded proxy is different than the provided username report error and exit if not (userName == "" or userName == foundUserName): gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % (userName, foundUserName)) userName = foundUserName authorDN = foundAuthor gLogger.info( "Will list transformations created by user '%s' with status '%s'" % (userName, ', '.join(transStatus))) else: gLogger.info( "Will list transformations created by '%s' with status '%s'" % (authorDN, ', '.join(transStatus))) condDict['AuthorDN'] = authorDN if transID: condDict['TransformationID'] = transID if transStatus: condDict['Status'] = transStatus res = self.transClient.getTransformations(condDict=condDict) if not res['OK']: if printOutput: self._prettyPrint(res) return res if printOutput: if not outputFields: gLogger.info("Available fields are: %s" % res['ParameterNames'].join(' ')) elif not res['Value']: gLogger.info("No tasks found for selection") else: self._printFormattedDictList(res['Value'], outputFields, 'TransformationID', orderBy) return res ############################################################################# def getSummaryTransformations(self, transID=[]): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. """ condDict = {'TransformationID': transID} orderby = [] start = 0 maxitems = len(transID) paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \ 'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \ 'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled'] # Below, the header used for each field in the printing: short to fit in one line paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \ 'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled'] dictList = [] result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems) if not result['OK']: self._prettyPrint(result) return result if result['Value']['TotalRecords'] > 0: try: paramNames = result['Value']['ParameterNames'] for paramValues in result['Value']['Records']: paramShowValues = map( lambda pname: paramValues[paramNames.index(pname)], paramShowNames) showDict = dict(zip(paramShowNamesShort, paramShowValues)) dictList.append(showDict) except Exception as x: print 'Exception %s ' % str(x) if not len(dictList) > 0: gLogger.error( 'No found transformations satisfying input condition') return S_ERROR( 'No found transformations satisfying input condition') else: print self._printFormattedDictList(dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0]) return S_OK(dictList) ############################################################################# def addTransformation(self, addFiles=True, printOutput=False): res = self._checkCreation() if not res['OK']: return self._errorReport(res, 'Failed transformation sanity check') if printOutput: gLogger.info( "Will attempt to create transformation with the following parameters" ) self._prettyPrint(self.paramValues) res = self.transClient.addTransformation( self.paramValues['TransformationName'], self.paramValues['Description'], self.paramValues['LongDescription'], self.paramValues['Type'], self.paramValues['Plugin'], self.paramValues['AgentType'], self.paramValues['FileMask'], transformationGroup=self.paramValues['TransformationGroup'], groupSize=self.paramValues['GroupSize'], inheritedFrom=self.paramValues['InheritedFrom'], body=self.paramValues['Body'], maxTasks=self.paramValues['MaxNumberOfTasks'], eventsPerTask=self.paramValues['EventsPerTask'], addFiles=addFiles) if not res['OK']: if printOutput: self._prettyPrint(res) return res transID = res['Value'] self.exists = True self.setTransformationID(transID) gLogger.notice("Created transformation %d" % transID) for paramName, paramValue in self.paramValues.items(): if paramName not in self.paramTypes: res = self.transClient.setTransformationParameter( transID, paramName, paramValue) if not res['OK']: gLogger.error("Failed to add parameter", "%s %s" % (paramName, res['Message'])) gLogger.notice( "To add this parameter later please execute the following." ) gLogger.notice("oTransformation = Transformation(%d)" % transID) gLogger.notice("oTransformation.set%s(...)" % paramName) return S_OK(transID) def _checkCreation(self): """ Few checks """ if self.paramValues['TransformationID']: gLogger.info( "You are currently working with an active transformation definition." ) gLogger.info( "If you wish to create a new transformation reset the TransformationID." ) gLogger.info("oTransformation.reset()") return S_ERROR() requiredParameters = [ 'TransformationName', 'Description', 'LongDescription', 'Type' ] for parameter in requiredParameters: if not self.paramValues[parameter]: gLogger.info( "%s is not defined for this transformation. This is required..." % parameter) self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " ") plugin = self.paramValues['Plugin'] if plugin: if not plugin in self.supportedPlugins: gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin) res = self.__promptForParameter('Plugin', choices=self.supportedPlugins, default='Standard') if not res['OK']: return res self.paramValues['Plugin'] = res['Value'] plugin = self.paramValues['Plugin'] return S_OK() def _checkBySizePlugin(self): return self._checkStandardPlugin() def _checkBySharePlugin(self): return self._checkStandardPlugin() def _checkStandardPlugin(self): groupSize = self.paramValues['GroupSize'] if groupSize <= 0: gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." ) res = self.setGroupSize(1) if not res['OK']: return res return S_OK() def _checkBroadcastPlugin(self): gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % (', '.join(['SourceSE', 'TargetSE']))) requiredParams = ['SourceSE', 'TargetSE'] for requiredParam in requiredParams: if not self.paramValues.get(requiredParam): paramValue = raw_input("Please enter " + requiredParam + " ") setter = None setterName = "set%s" % requiredParam if hasattr(self, setterName) and callable( getattr(self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName) ses = paramValue.replace(',', ' ').split() res = setter(ses) if not res['OK']: return res return S_OK() def __checkSEs(self, seList): res = gConfig.getSections('/Resources/StorageElements') if not res['OK']: return self._errorReport(res, 'Failed to get possible StorageElements') missing = set(seList) - set(res['Value']) if missing: for se in missing: gLogger.error("StorageElement %s is not known" % se) return S_ERROR("%d StorageElements not known" % len(missing)) return S_OK() def __promptForParameter(self, parameter, choices=[], default='', insert=True): res = promptUser("Please enter %s" % parameter, choices=choices, default=default) if not res['OK']: return self._errorReport(res) gLogger.notice("%s will be set to '%s'" % (parameter, res['Value'])) paramValue = res['Value'] if insert: setter = None setterName = "set%s" % parameter if hasattr(self, setterName) and callable(getattr( self, setterName)): setter = getattr(self, setterName) if not setter: return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" ) res = setter(paramValue) if not res['OK']: return res return S_OK(paramValue)
class TransformationCLI( cmd.Cmd, API ): def __init__( self ): self.server = TransformationClient() self.indentSpace = 4 cmd.Cmd.__init__( self ) API.__init__( self ) def printPair( self, key, value, separator = ":" ): valueList = value.split( "\n" ) print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() ) for valueLine in valueList[ 1:-1 ]: print "%s %s" % ( " " * self.indentSpace, valueLine.strip() ) def do_exit( self, args ): """ Exits the shell. usage: exit """ sys.exit( 0 ) def do_quit( self, *args ): """ Exits the shell. Usage: quit """ sys.exit( 0 ) def do_help( self, args ): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commands""" cmd.Cmd.do_help( self, args ) # overriting default help command def do_helpall( self, args ): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len( args ) == 0: print "\nAvailable commands:\n" attrList = dir( self ) attrList.sort() for attribute in attrList: if attribute.find( "do_" ) == 0: self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] ) print "" else: command = args.split()[0].strip() try: obj = getattr( self, "do_%s" % command ) except: print "There's no such %s command" % command return self.printPair( command, obj.__doc__[1:] ) def do_shell( self, args ): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall( 0, comm ) if res['OK'] and res['Value'][0] == 0: _returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % ( stdOut, stdErr ) else: print res['Message'] def check_params( self, args, num ): """Checks if the number of parameters correct""" argss = args.split() length = len( argss ) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num ) return ( False, length ) return ( argss, length ) def check_id_or_name( self, id_or_name ): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long( id_or_name ) # its look like id return id_or_name #################################################################### # # These are the methods for transformation manipulation # def do_getall( self, args ): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.getTransformations( transStatus = args.split(), printOutput = True ) def do_getAllByUser( self, args ): """Get all transformations created by a given user The first argument is the authorDN or username. The authorDN is preferred: it need to be inside quotes because contains white spaces. Only authorDN should be quoted. When the username is provided instead, the authorDN is retrieved from the uploaded proxy, so that the retrieved transformations are those created by the user who uploaded that proxy: that user could be different that the username provided to the function. usage: getAllByUser authorDN or username [Status] [Status] """ oTrans = Transformation() argss = args.split() username = "" author = "" status = [] if not len( argss ) > 0: print self.do_getAllByUser.__doc__ return # if the user didnt quoted the authorDN ends if '=' in argss[0] and argss[0][0] not in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return if argss[0][0] in ["'", '"']: # authorDN given author = argss[0] status_idx = 1 for arg in argss[1:]: author += ' ' + arg status_idx +=1 if arg[-1] in ["'", '"']: break # At this point we should have something like 'author' if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']: print "AuthorDN need to be quoted (just quote that argument)" return else: author = author[1:-1] # throw away the quotes # the rest are the requested status status = argss[ status_idx: ] else: # username given username = argss[0] status = argss[ 1: ] oTrans.getTransformationsByUser( authorDN = author, userName = username, transStatus = status, printOutput = True ) def do_summaryTransformations( self, args ): """Show the summary for a list of Transformations Fields starting with 'F' ('J') refers to files (jobs). Proc. stand for processed. Usage: summaryTransformations <ProdID> [<ProdID> ...] """ argss = args.split() if not len( argss ) > 0: print self.do_summaryTransformations.__doc__ return transid = argss oTrans = Transformation() oTrans.getSummaryTransformations( transID = transid ) def do_getStatus( self, args ): """Get transformation details usage: getStatus <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation( transName ) if not res['OK']: print "Getting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s: %s" % ( transName, res['Value']['Status'] ) def do_setStatus( self, args ): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = args.split() if not len( argss ) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status ) if not res['OK']: print "Setting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s set to %s" % ( transName, status ) def do_start( self, args ): """Start transformation usage: start <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active' ) if not res['OK']: print "Setting Status of %s failed: %s" % ( transName, res['Message'] ) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' ) if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] ) else: print "%s started" % transName def do_stop( self, args ): """Stop transformation usage: stop <transID|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' ) if not res['OK']: print "Stopping of %s failed: %s" % ( transName, res['Message'] ) else: print "%s stopped" % transName def do_flush( self, args ): """Flush transformation usage: flush <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush' ) if not res['OK']: print "Flushing of %s failed: %s" % ( transName, res['Message'] ) else: print "%s flushing" % transName def do_get( self, args ): """Get transformation definition usage: get <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Body' ) printDict( res['Value'] ) def do_getBody( self, args ): """Get transformation body usage: getBody <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: print res['Value']['Body'] def do_getFileStat( self, args ): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats( transName ) if not res['OK']: print "Failed to get statistics for %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Total' ) printDict( res['Value'] ) def do_modMask( self, args ): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask ) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] ) else: print "Updated %s filemask" % transName def do_getFiles( self, args ): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "No files found" def do_getFileStatus( self, args ): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = args.split() if len( argss ) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append( fileDict ) if filesList: self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_getOutputFiles( self, args ): """Get output files for the transformation usage: getOutputFiles <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: fc = FileCatalog() meta = {} meta ['ProdID'] = transName res = fc.findFilesByMetadata( meta ) if not res['OK']: print res['Message'] return if not len( res['Value'] ) > 0: print 'No output files yet for transformation %d' %int(transName) return else: for lfn in res['Value']: print lfn def do_getInputDataQuery( self, args ): """Get input data query for the transformation usage: getInputDataQuery <transName|ID> """ argss = args.split() if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationInputDataQuery( transName ) if not res['OK']: print "Failed to get transformation input data query: %s" % res['Message'] else: print res['Value'] def do_setFileStatus( self, args ): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = args.split() if not len( argss ) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn] ) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile( self, args ): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfns> """ argss = args.split() if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if 'Failed' in res['Value']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) def do_resetProcessedFile( self, args ): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = args.split() if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) #################################################################### # # These are the methods for file manipulation # def do_addDirectory( self, args ): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = args.split() if not len( argss ) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory( directory, force = True ) if not res['OK']: print 'failed to add directory %s: %s' % ( directory, res['Message'] ) else: print 'added %s files for %s' % ( res['Value'], directory ) def do_replicas( self, args ): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = args.split() if not len( argss ) > 0: print "no files supplied" return res = self.server.getReplicas( argss ) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % ( lfn, error ) for lfn in sorted( res['Value']['Successful'].keys() ): ses = sorted( res['Value']['Successful'][lfn].keys() ) outStr = "%s :" % lfn.ljust( 100 ) for se in ses: outStr = "%s %s" % ( outStr, se.ljust( 15 ) ) print outStr def do_addFile( self, args ): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = args.split() if not len( argss ) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addFile( lfnDict, force = True ) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % ( lfn, error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeFile( self, args ): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = args.split() if not len( argss ) > 0: print "no files supplied" return res = self.server.removeFile( argss ) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % ( lfn, error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_addReplica( self, args ): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = args.split() if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addReplica( lfnDict, force = True ) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % ( error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeReplica( self, args ): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = args.split() if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.removeReplica( lfnDict ) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % ( error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_setReplicaStatus( self, args ): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = args.split() if not len( argss ) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.setReplicaStatus( lfnDict ) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sorted( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % ( error ) for lfn in sorted( res['Value']['Successful'].keys() ): print "updated replica status %s" % lfn
class TransformationCLI( cmd.Cmd, API ): def __init__( self ): self.server = TransformationClient() self.indentSpace = 4 cmd.Cmd.__init__( self ) API.__init__( self ) def printPair( self, key, value, separator = ":" ): valueList = value.split( "\n" ) print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() ) for valueLine in valueList[ 1:-1 ]: print "%s %s" % ( " " * self.indentSpace, valueLine.strip() ) def do_exit( self, args ): """ Exits the shell. usage: exit """ sys.exit( 0 ) def do_quit( self, *args ): """ Exits the shell. Usage: quit """ sys.exit( 0 ) def do_help( self, args ): """ Default version of the help command Usage: help <command> OR use helpall to see description for all commans""" cmd.Cmd.do_help( self, args ) # overriting default help command def do_helpall( self, args ): """ Shows help information Usage: helpall <command> If no command is specified all commands are shown """ if len( args ) == 0: print "\nAvailable commands:\n" attrList = dir( self ) attrList.sort() for attribute in attrList: if attribute.find( "do_" ) == 0: self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] ) print "" else: command = args.split()[0].strip() try: obj = getattr( self, "do_%s" % command ) except: print "There's no such %s command" % command return self.printPair( command, obj.__doc__[1:] ) def do_shell( self, args ): """Execute a shell command usage !<shell_command> """ comm = args res = shellCall( 0, comm ) if res['OK'] and res['Value'][0] == 0: returnCode, stdOut, stdErr = res['Value'] print "%s\n%s" % ( stdOut, stdErr ) else: print res['Message'] def check_params( self, args, num ): """Checks if the number of parameters correct""" argss = string.split( args ) length = len( argss ) if length < num: print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num ) return ( False, length ) return ( argss, length ) def check_id_or_name( self, id_or_name ): """resolve name or Id by converting type of argument """ if id_or_name.isdigit(): return long( id_or_name ) # its look like id return id_or_name def do_setServer( self, args ): """ Set the destination server usage: setServer serverURL """ argss = string.split( args ) if len( argss ) == 0: print "no server provided" self.serverURL = argss[0] self.server.setServer( self.serverURL ) #################################################################### # # These are the methods for transformation manipulation # def do_getall( self, args ): """Get transformation details usage: getall [Status] [Status] """ oTrans = Transformation() oTrans.setServer( self.serverURL ) oTrans.getTransformations( transStatus = string.split( args ), printOutput = True ) def do_getStatus( self, args ): """Get transformation details usage: getStatus <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.getTransformation( transName ) if not res['OK']: print "Getting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s: %s" % ( transName, res['Value']['Status'] ) def do_setStatus( self, args ): """Set transformation status usage: setStatus <Status> <transName|ID> Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'> """ argss = string.split( args ) if not len( argss ) > 1: print "transformation and status not supplied" return status = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, 'Status', status ) if not res['OK']: print "Setting status of %s failed: %s" % ( transName, res['Message'] ) else: print "%s set to %s" % ( transName, status ) def do_start( self, args ): """Start transformation usage: start <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Active' ) if not res['OK']: print "Setting Status of %s failed: %s" % ( transName, res['Message'] ) else: res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' ) if not res['OK']: print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] ) else: print "%s started" % transName def do_stop( self, args ): """Stop transformation usage: stop <transID|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' ) if not res['OK']: print "Stopping of %s failed: %s" % ( transName, res['Message'] ) else: print "%s stopped" % transName def do_flush( self, args ): """Flush transformation usage: flush <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return for transName in argss: res = self.server.setTransformationParameter( transName, 'Status', 'Flush' ) if not res['OK']: print "Flushing of %s failed: %s" % ( transName, res['Message'] ) else: print "%s flushing" % transName def do_get( self, args ): """Get transformation definition usage: get <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Body' ) printDict( res['Value'] ) def do_getBody( self, args ): """Get transformation body usage: getBody <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get %s: %s" % ( transName, res['Message'] ) else: print res['Value']['Body'] def do_getFileStat( self, args ): """Get transformation file statistics usage: getFileStat <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] res = self.server.getTransformationStats( transName ) if not res['OK']: print "Failed to get statistics for %s: %s" % ( transName, res['Message'] ) else: res['Value'].pop( 'Total' ) printDict( res['Value'] ) def do_modMask( self, args ): """Modify transformation input definition usage: modInput <mask> <transName|ID> """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return mask = argss[0] transNames = argss[1:] for transName in transNames: res = self.server.setTransformationParameter( transName, "FileMask", mask ) if not res['OK']: print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] ) else: print "Updated %s filemask" % transName def do_getFiles( self, args ): """Get files for the transformation (optionally with a given status) usage: getFiles <transName|ID> [Status] [Status] """ argss = string.split( args ) if not len( argss ) > 0: print "no transformation supplied" return transName = argss[0] status = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} if status: selectDict['Status'] = status res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "No files found" def do_getFileStatus( self, args ): """Get file(s) status for the given transformation usage: getFileStatus <transName|ID> <lfn> [<lfn>...] """ argss = string.split( args ) if len( argss ) < 2: print "transformation and file not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.getTransformation( transName ) if not res['OK']: print "Failed to get transformation information: %s" % res['Message'] else: selectDict = {'TransformationID':res['Value']['TransformationID']} res = self.server.getTransformationFiles( condDict = selectDict ) if not res['OK']: print "Failed to get transformation files: %s" % res['Message'] elif res['Value']: filesList = [] for fileDict in res['Value']: if fileDict['LFN'] in lfns: filesList.append( fileDict ) if filesList: self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' ) else: print "Could not find any LFN in", lfns, "for transformation", transName else: print "No files found" def do_setFileStatus( self, args ): """Set file status for the given transformation usage: setFileStatus <transName|ID> <lfn> <status> """ argss = string.split( args ) if not len( argss ) == 3: print "transformation file and status not supplied" return transName = argss[0] lfn = argss[1] status = argss[2] res = self.server.setFileStatusForTransformation( transName, status, [lfn] ) if not res['OK']: print "Failed to update file status: %s" % res['Message'] else: print "Updated file status to %s" % status def do_resetFile( self, args ): """Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = string.split( args ) if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) def do_resetProcessedFile( self, args ): """ Reset file status for the given transformation usage: resetFile <transName|ID> <lfn> """ argss = string.split( args ) if not len( argss ) > 1: print "transformation and file(s) not supplied" return transName = argss[0] lfns = argss[1:] res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True ) if not res['OK']: print "Failed to reset file status: %s" % res['Message'] else: if res['Value']['Failed']: print "Could not reset some files: " for lfn, reason in res['Value']['Failed'].items(): print lfn, reason else: print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns ) #################################################################### # # These are the methods for file manipulation # def do_addDirectory( self, args ): """Add files from the given catalog directory usage: addDirectory <directory> [directory] """ argss = string.split( args ) if not len( argss ) > 0: print "no directory supplied" return for directory in argss: res = self.server.addDirectory( directory, force = True ) if not res['OK']: print 'failed to add directory %s: %s' % ( directory, res['Message'] ) else: print 'added %s files for %s' % ( res['Value'], directory ) def do_replicas( self, args ): """ Get replicas for <path> usage: replicas <lfn> [lfn] """ argss = string.split( args ) if not len( argss ) > 0: print "no files supplied" return res = self.server.getReplicas( argss ) if not res['OK']: print "failed to get any replica information: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to get replica information for %s: %s" % ( lfn, error ) for lfn in sortList( res['Value']['Successful'].keys() ): ses = sortList( res['Value']['Successful'][lfn].keys() ) outStr = "%s :" % lfn.ljust( 100 ) for se in ses: outStr = "%s %s" % ( outStr, se.ljust( 15 ) ) print outStr def do_addFile( self, args ): """Add new files to transformation DB usage: addFile <lfn> [lfn] """ argss = string.split( args ) if not len( argss ) > 0: print "no files supplied" return lfnDict = {} for lfn in argss: lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addFile( lfnDict, force = True ) if not res['OK']: print "failed to add any files: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add %s: %s" % ( lfn, error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeFile( self, args ): """Remove file from transformation DB usage: removeFile <lfn> [lfn] """ argss = string.split( args ) if not len( argss ) > 0: print "no files supplied" return res = self.server.removeFile( argss ) if not res['OK']: print "failed to remove any files: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove %s: %s" % ( lfn, error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_addReplica( self, args ): """ Add new replica to the transformation DB usage: addReplica <lfn> <se> """ argss = string.split( args ) if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.addReplica( lfnDict, force = True ) if not res['OK']: print "failed to add replica: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to add replica: %s" % ( error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "added %s" % lfn def do_removeReplica( self, args ): """Remove replica from the transformation DB usage: removeReplica <lfn> <se> """ argss = string.split( args ) if not len( argss ) == 2: print "no file info supplied" return lfn = argss[0] se = argss[1] lfnDict = {} lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.removeReplica( lfnDict ) if not res['OK']: print "failed to remove replica: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to remove replica: %s" % ( error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "removed %s" % lfn def do_setReplicaStatus( self, args ): """Set replica status, usually used to mark a replica Problematic usage: setReplicaStatus <lfn> <status> <se> """ argss = string.split( args ) if not len( argss ) > 2: print "no file info supplied" return lfn = argss[0] status = argss[1] se = argss[2] lfnDict = {} lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'} res = self.server.setReplicaStatus( lfnDict ) if not res['OK']: print "failed to set replica status: %s" % res['Message'] return for lfn in sortList( res['Value']['Failed'].keys() ): error = res['Value']['Failed'][lfn] print "failed to set replica status: %s" % ( error ) for lfn in sortList( res['Value']['Successful'].keys() ): print "updated replica status %s" % lfn
class TransformationAgent( AgentModule ): def initialize( self ): self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' ) self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' ) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/ProductionManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'ProductionManager' ) self.transDB = TransformationClient( 'TransformationDB' ) self.rm = ReplicaManager() return S_OK() def execute( self ): # Get the transformations to process res = self.getTransformations() if not res['OK']: gLogger.info( "%s.execute: Failed to obtain transformations: %s" % ( AGENT_NAME, res['Message'] ) ) return S_OK() # Process the transformations for transDict in res['Value']: transID = long( transDict['TransformationID'] ) gLogger.info( "%s.execute: Processing transformation %s." % ( AGENT_NAME, transID ) ) startTime = time.time() res = self.processTransformation( transDict ) if not res['OK']: gLogger.info( "%s.execute: Failed to process transformation: %s" % ( AGENT_NAME, res['Message'] ) ) else: gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % ( AGENT_NAME, time.time() - startTime ) ) return S_OK() def getTransformations( self ): # Obtain the transformations to be executed transName = self.am_getOption( 'Transformation', 'All' ) if transName == 'All': gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME ) res = self.transDB.getTransformations( {'Status':['Active', 'Completing', 'Flush']}, extraParams = True ) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message'] ) return res transformations = res['Value'] gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % ( AGENT_NAME, len( transformations ) ) ) else: gLogger.info( "%s.getTransformations: Initializing for transformation %s." % ( AGENT_NAME, transName ) ) res = self.transDB.getTransformation( transName, extraParams = True ) if not res['OK']: gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message'] ) return res transformations = [res['Value']] return S_OK( transformations ) def processTransformation( self, transDict ): transID = transDict['TransformationID'] # First get the LFNs associated to the transformation res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} ) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message'] ) return res transFiles = res['Value'] lfns = res['LFNs'] if not lfns: gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME ) if transDict['Status'] == 'Flush': res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] ) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME ) return S_OK() # Check the data is available with replicas res = self.__getDataReplicas( transID, lfns, active = ( transDict['Type'].lower() not in ["replication", "removal"] ) ) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message'] ) return res dataReplicas = res['Value'] # Get the plug-in type and create the plug-in object plugin = 'Standard' if transDict.has_key( 'Plugin' ) and transDict['Plugin']: plugin = transDict['Plugin'] gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % ( AGENT_NAME, plugin ) ) res = self.__generatePluginObject( plugin ) if not res['OK']: return res oPlugin = res['Value'] # Get the plug-in and set the required params oPlugin.setParameters( transDict ) oPlugin.setInputData( dataReplicas ) oPlugin.setTransformationFiles( transFiles ) res = oPlugin.generateTasks() if not res['OK']: gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message'] ) return res tasks = res['Value'] # Create the tasks allCreated = True created = 0 for se, lfns in tasks: res = self.transDB.addTaskForTransformation( transID, lfns, se ) if not res['OK']: gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message'] ) allCreated = False else: created += 1 if created: gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % ( AGENT_NAME, created ) ) # If this production is to Flush if transDict['Status'] == 'Flush' and allCreated: res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' ) if not res['OK']: gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] ) else: gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME ) return S_OK() ###################################################################### # # Internal methods used by the agent # def __generatePluginObject( self, plugin ): """ This simply instantiates the TransformationPlugin class with the relevant plugin name """ try: plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] ) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x ) return S_ERROR() try: evalString = "plugModule.TransformationPlugin('%s')" % plugin return S_OK( eval( evalString ) ) except Exception, x: gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % ( AGENT_NAME, plugin ), '', x ) return S_ERROR()