Exemplo n.º 1
0
def _extend():
    """Extends all the tasks"""
    clip = _Params()
    clip.registerSwitches()
    Script.parseCommandLine()

    from DIRAC import gLogger, exit as dexit

    if not clip.prod or not clip.tasks:
        gLogger.error("Production ID is 0 or Tasks is 0, cannot be")
        dexit(1)

    from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    tc = TransformationClient()
    res = tc.getTransformation(clip.prod)
    trans = res['Value']
    transp = trans['Plugin']
    if transp != 'Limited':
        gLogger.error(
            "This cannot be used on productions that are not using the 'Limited' plugin"
        )
        dexit(0)

    gLogger.info("Prod %s has %s tasks registered" %
                 (clip.prod, trans['MaxNumberOfTasks']))
    if clip.tasks > 0:
        max_tasks = trans['MaxNumberOfTasks'] + clip.tasks
        groupsize = trans['GroupSize']
        gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %
                       (clip.tasks, clip.tasks * groupsize, clip.prod))
    elif clip.tasks < 0:
        max_tasks = -1
        gLogger.notice(
            "Now all existing files in the DB for production %s will be processed."
            % clip.prod)
    else:
        gLogger.error("Number of tasks must be different from 0")
        dexit(1)
    res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks',
                                        max_tasks)
    if not res['OK']:
        gLogger.error(res['Message'])
        dexit(1)
    gLogger.notice("Production %s extended!" % clip.prod)

    dexit(0)
def _extend():
  """Extends all the tasks"""
  clip = _Params()
  clip.registerSwitches()
  Script.parseCommandLine()
  
  from DIRAC import gLogger, exit as dexit
  
  if not clip.prod or not clip.tasks:
    gLogger.error("Production ID is 0 or Tasks is 0, cannot be")
    dexit(1)
    
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()
  res = tc.getTransformation(clip.prod)
  trans= res['Value']
  transp = trans['Plugin']
  if transp != 'Limited':
    gLogger.error("This cannot be used on productions that are not using the 'Limited' plugin")
    dexit(0)
  
  gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks']) )
  if clip.tasks >0:
    max_tasks = trans['MaxNumberOfTasks'] + clip.tasks  
    groupsize = trans['GroupSize']
    gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %(clip.tasks, clip.tasks*groupsize, clip.prod))
  elif clip.tasks <0:
    max_tasks = -1
    gLogger.notice("Now all existing files in the DB for production %s will be processed." % clip.prod)
  else:
    gLogger.error("Number of tasks must be different from 0")
    dexit(1)
  res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks', max_tasks)
  if not res['OK']:
    gLogger.error(res['Message'])
    dexit(1)
  gLogger.notice("Production %s extended!" % clip.prod)
    
  dexit(0)
Exemplo n.º 3
0
class ValidateOutputDataAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.consistencyInspector = ConsistencyInspector()
        self.integrityClient = DataIntegrityClient()
        self.fc = FileCatalog()
        self.transClient = TransformationClient()
        self.fileCatalogClient = FileCatalogClient()

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = agentTSTypes
        else:
            self.transformationTypes = Operations().getValue(
                'Transformations/DataProcessing', ['MCSimulation', 'Merge'])

        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations',
                              ['TransformationDB', 'MetadataCatalog']))
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              "TransformationID")
        self.enableFlag = True

    #############################################################################

    def initialize(self):
        """ Sets defaults
    """

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        gLogger.info("Will treat the following transformation types: %s" %
                     str(self.transformationTypes))
        gLogger.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        gLogger.info("Will use %s as metadata tag name for TransformationID" %
                     self.transfidmeta)
        return S_OK()

    #############################################################################

    def execute(self):
        """ The VerifyOutputData execution method
    """
        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                "VerifyOutputData is disabled by configuration option 'EnableFlag'"
            )
            return S_OK('Disabled via CS flag')

        gLogger.info("-" * 40)
        self.updateWaitingIntegrity()
        gLogger.info("-" * 40)

        res = self.transClient.getTransformations({
            'Status':
            'ValidatingOutput',
            'Type':
            self.transformationTypes
        })
        if not res['OK']:
            gLogger.error("Failed to get ValidatingOutput transformations",
                          res['Message'])
            return res
        transDicts = res['Value']
        if not transDicts:
            gLogger.info("No transformations found in ValidatingOutput status")
            return S_OK()
        gLogger.info("Found %s transformations in ValidatingOutput status" %
                     len(transDicts))
        for transDict in transDicts:
            transID = transDict['TransformationID']
            res = self.checkTransformationIntegrity(int(transID))
            if not res['OK']:
                gLogger.error(
                    "Failed to perform full integrity check for transformation %d"
                    % transID)
            else:
                self.finalizeCheck(transID)
                gLogger.info("-" * 40)
        return S_OK()

    def updateWaitingIntegrity(self):
        """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput'
    """
        gLogger.info(
            "Looking for transformations in the WaitingIntegrity status to update"
        )
        res = self.transClient.getTransformations(
            {'Status': 'WaitingIntegrity'})
        if not res['OK']:
            gLogger.error("Failed to get WaitingIntegrity transformations",
                          res['Message'])
            return res
        transDicts = res['Value']
        if not transDicts:
            gLogger.info("No transformations found in WaitingIntegrity status")
            return S_OK()
        gLogger.info("Found %s transformations in WaitingIntegrity status" %
                     len(transDicts))
        for transDict in transDicts:
            transID = transDict['TransformationID']
            gLogger.info("-" * 40)
            res = self.integrityClient.getTransformationProblematics(
                int(transID))
            if not res['OK']:
                gLogger.error(
                    "Failed to determine waiting problematics for transformation",
                    res['Message'])
            elif not res['Value']:
                res = self.transClient.setTransformationParameter(
                    transID, 'Status', 'ValidatedOutput')
                if not res['OK']:
                    gLogger.error(
                        "Failed to update status of transformation %s to ValidatedOutput"
                        % (transID))
                else:
                    gLogger.info(
                        "Updated status of transformation %s to ValidatedOutput"
                        % (transID))
            else:
                gLogger.info(
                    "%d problematic files for transformation %s were found" %
                    (len(res['Value']), transID))
        return

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ Get the directories for the supplied transformation from the transformation system
    """
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                gLogger.error("Failed to obtain transformation directories",
                              res['Message'])
                return res
            if not isinstance(res['Value'], list):
                transDirectories = ast.literal_eval(res['Value'])
            else:
                transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.fileCatalogClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                gLogger.error("Failed to obtain metadata catalog directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)
        if not directories:
            gLogger.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @staticmethod
    def _addDirs(transID, newDirs, existingDirs):
        for nDir in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, nDir):
                if nDir not in existingDirs:
                    existingDirs.append(nDir)
        return existingDirs

    #############################################################################
    def checkTransformationIntegrity(self, transID):
        """ This method contains the real work
    """
        gLogger.info("-" * 40)
        gLogger.info("Checking the integrity of transformation %s" % transID)
        gLogger.info("-" * 40)

        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            return res
        directories = res['Value']
        if not directories:
            return S_OK()

        ######################################################
        #
        # This check performs Catalog->SE for possible output directories
        #
        res = self.fc.exists(directories)
        if not res['OK']:
            gLogger.error('Failed to check directory existence',
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed']:
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR("Failed to determine the existance of directories")
        directoryExists = res['Value']['Successful']
        for directory in sorted(directoryExists.keys()):
            if not directoryExists[directory]:
                continue
            iRes = self.consistencyInspector.catalogDirectoryToSE(directory)
            if not iRes['OK']:
                gLogger.error(iRes['Message'])
                return iRes

        gLogger.info("-" * 40)
        gLogger.info("Completed integrity check for transformation %s" %
                     transID)
        return S_OK()

    def finalizeCheck(self, transID):
        """ Move to 'WaitingIntegrity' or 'ValidatedOutput'
    """
        res = self.integrityClient.getTransformationProblematics(int(transID))

        if not res['OK']:
            gLogger.error(
                "Failed to determine whether there were associated problematic files",
                res['Message'])
            newStatus = ''
        elif res['Value']:
            gLogger.info(
                "%d problematic files for transformation %s were found" %
                (len(res['Value']), transID))
            newStatus = "WaitingIntegrity"
        else:
            gLogger.info("No problematics were found for transformation %s" %
                         transID)
            newStatus = "ValidatedOutput"
        if newStatus:
            res = self.transClient.setTransformationParameter(
                transID, 'Status', newStatus)
            if not res['OK']:
                gLogger.error(
                    "Failed to update status of transformation %s to %s" %
                    (transID, newStatus))
            else:
                gLogger.info("Updated status of transformation %s to %s" %
                             (transID, newStatus))
        gLogger.info("-" * 40)
        return S_OK()
Exemplo n.º 4
0
class TransformationCleaningAgent(AgentModule):
    """
  .. class:: TransformationCleaningAgent

  :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
  :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
  :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.shifterProxy = None

        # # transformation client
        self.transClient = None
        # # wms client
        self.wmsClient = None
        # # request client
        self.reqClient = None
        # # file catalog client
        self.metadataClient = None

        # # transformations types
        self.transformationTypes = None
        # # directory locations
        self.directoryLocations = ['TransformationDB', 'MetadataCatalog']
        # # transformation metadata
        self.transfidmeta = 'TransformationID'
        # # archive periof in days
        self.archiveAfter = 7
        # # transformation log SEs
        self.logSE = 'LogSE'
        # # enable/disable execution
        self.enableFlag = 'True'

        self.dataProcTTypes = ['MCSimulation', 'Merge']
        self.dataManipTTypes = ['Replication', 'Removal']

    def initialize(self):
        """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
        # # shifter proxy
        # See cleanContent method: this proxy will be used ALSO when the file catalog used
        # is the DIRAC File Catalog (DFC).
        # This is possible because of unset of the "UseServerCertificate" option
        self.shifterProxy = self.am_getOption('shifterProxy',
                                              self.shifterProxy)

        # # transformations types
        self.dataProcTTypes = Operations().getValue(
            'Transformations/DataProcessing', self.dataProcTTypes)
        self.dataManipTTypes = Operations().getValue(
            'Transformations/DataManipulation', self.dataManipTTypes)
        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(self.dataProcTTypes +
                                              self.dataManipTTypes)
        self.log.info("Will consider the following transformation types: %s" %
                      str(self.transformationTypes))
        # # directory locations
        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations', self.directoryLocations))
        self.log.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        # # transformation metadata
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              self.transfidmeta)
        self.log.info("Will use %s as metadata tag name for TransformationID" %
                      self.transfidmeta)
        # # archive periof in days
        self.archiveAfter = self.am_getOption('ArchiveAfter',
                                              self.archiveAfter)  # days
        self.log.info("Will archive Completed transformations after %d days" %
                      self.archiveAfter)
        # # transformation log SEs
        self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
        self.log.info("Will remove logs found on storage element: %s" %
                      self.logSE)

        # # transformation client
        self.transClient = TransformationClient()
        # # wms client
        self.wmsClient = WMSClient()
        # # request client
        self.reqClient = ReqClient()
        # # file catalog client
        self.metadataClient = FileCatalogClient()

        return S_OK()

    #############################################################################
    def execute(self):
        """ execution in one agent's cycle

    :param self: self reference
    """

        self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
        if self.enableFlag != 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option EnableFlag'
            )
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Cleaning',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Failed to get transformations", res['Message'])

        # Obtain the transformations in RemovingFiles status and removes the output files
        res = self.transClient.getTransformations({
            'Status':
            'RemovingFiles',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeRemoval(transDict)
                else:
                    self.log.info(
                        "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeRemoval)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                'Status': 'Completed',
                'Type': self.transformationTypes
            },
            older=olderThanTime,
            timeStamp='LastUpdate')
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])
        return S_OK()

    def _executeClean(self, transDict):
        """Clean transformation."""
        # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # We just archive
        if transDict['Type'] in self.dataManipTTypes:
            res = self.archiveTransformation(transDict['TransformationID'])
            if not res['OK']:
                self.log.error("Problems archiving transformation %s: %s" %
                               (transDict['TransformationID'], res['Message']))
        else:
            res = self.cleanTransformation(transDict['TransformationID'])
            if not res['OK']:
                self.log.error("Problems cleaning transformation %s: %s" %
                               (transDict['TransformationID'], res['Message']))

    def _executeRemoval(self, transDict):
        """Remove files from given transformation."""
        res = self.removeTransformationOutput(transDict['TransformationID'])
        if not res['OK']:
            self.log.error("Problems removing transformation %s: %s" %
                           (transDict['TransformationID'], res['Message']))

    def _executeArchive(self, transDict):
        """Archive the given transformation."""
        res = self.archiveTransformation(transDict['TransformationID'])
        if not res['OK']:
            self.log.error("Problems archiving transformation %s: %s" %
                           (transDict['TransformationID'], res['Message']))

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ get the directories for the supplied transformation from the transformation system.
        These directories are used by removeTransformationOutput and cleanTransformation for removing output.

    :param self: self reference
    :param int transID: transformation ID
    """
        self.log.verbose(
            "Cleaning Transformation directories of transformation %d" %
            transID)
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                self.log.error("Failed to obtain transformation directories",
                               res['Message'])
                return res
            transDirectories = []
            if res['Value']:
                if not isinstance(res['Value'], list):
                    try:
                        transDirectories = ast.literal_eval(res['Value'])
                    except BaseException:
                        # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
                        transDirectories.append(res['Value'])
                else:
                    transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                self.log.error("Failed to obtain metadata catalog directories",
                               res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @classmethod
    def _addDirs(cls, transID, newDirs, existingDirs):
        """ append unique :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
        for folder in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, str(folder)):
                if folder not in existingDirs:
                    existingDirs.append(os.path.normpath(folder))
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanContent(self, directory):
        """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
        self.log.verbose("Cleaning Catalog contents")
        res = self.__getCatalogDirectoryContents([directory])
        if not res['OK']:
            return res
        filesFound = res['Value']
        if not filesFound:
            self.log.info(
                "No files are registered in the catalog directory %s" %
                directory)
            return S_OK()
        self.log.info(
            "Attempting to remove %d possible remnants from the catalog and storage"
            % len(filesFound))

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'false')
        res = DataManager().removeFile(filesFound, force=True)
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'true')

        if not res['OK']:
            return res
        realFailure = False
        for lfn, reason in res['Value']['Failed'].items():
            if "File does not exist" in str(reason):
                self.log.warn("File %s not found in some catalog: " % (lfn))
            else:
                self.log.error("Failed to remove file found in the catalog",
                               "%s %s" % (lfn, reason))
                realFailure = True
        if realFailure:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
        self.log.info('Obtaining the catalog contents for %d directories:' %
                      len(directories))
        for directory in directories:
            self.log.info(directory)
        activeDirs = directories
        allFiles = {}
        fc = FileCatalog()
        while activeDirs:
            currentDir = activeDirs[0]
            res = returnSingleResult(fc.listDirectory(currentDir))
            activeDirs.remove(currentDir)
            if not res['OK'] and 'Directory does not exist' in res[
                    'Message']:  # FIXME: DFC should return errno
                self.log.info("The supplied directory %s does not exist" %
                              currentDir)
            elif not res['OK']:
                if "No such file or directory" in res['Message']:
                    self.log.info("%s: %s" % (currentDir, res['Message']))
                else:
                    self.log.error("Failed to get directory %s content: %s" %
                                   (currentDir, res['Message']))
            else:
                dirContents = res['Value']
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])
        self.log.info("Found %d files" % len(allFiles))
        return S_OK(allFiles.keys())

    def cleanTransformationLogFiles(self, directory):
        """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
        self.log.verbose("Removing log files found in the directory %s" %
                         directory)
        res = returnSingleResult(
            StorageElement(self.logSE).removeDirectory(directory,
                                                       recursive=True))
        if not res['OK']:
            if cmpError(res, errno.ENOENT):  # No such file or directory
                self.log.warn("Transformation log directory does not exist",
                              directory)
                return S_OK()
            self.log.error("Failed to remove log files", res['Message'])
            return res
        self.log.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """ This just removes any mention of the output data from the catalog and storage """
        self.log.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            self.log.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        for directory in directories:
            if not re.search('/LOG/', directory):
                res = self.cleanContent(directory)
                if not res['OK']:
                    return res

        self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" %
                      (len(directories), transID))
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully removed output of transformation %d" %
                      transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'RemovedFiles')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to RemovedFiles" %
                      (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
        self.log.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Archived')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to Archived" %
                      (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
        self.log.info("Cleaning transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            self.log.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search('/LOG/', directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res['OK']:
                    return res
            res = self.cleanContent(directory)
            if not res['OK']:
                return res

        # Clean ALL the possible remnants found
        res = self.cleanMetadataCatalogFiles(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        self.log.info("Successfully cleaned transformation %d" % transID)
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Cleaned')
        if not res['OK']:
            self.log.error(
                "Failed to update status of transformation %s to Cleaned" %
                (transID), res['Message'])
            return res
        self.log.info("Updated status of transformation %s to Cleaned" %
                      (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """ wipe out files from catalog """
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res['OK']:
            return res
        fileToRemove = res['Value']
        if not fileToRemove:
            self.log.info('No files found for transID %s' % transID)
            return S_OK()

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'false')
        res = DataManager().removeFile(fileToRemove, force=True)
        gConfigurationData.setOptionInCFG(
            '/DIRAC/Security/UseServerCertificate', 'true')

        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            self.log.error("Failed to remove file found in metadata catalog",
                           "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        self.log.info("Successfully removed all files found in the BK")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
        self.log.verbose("Cleaning Transformation tasks of transformation %d" %
                         transID)
        res = self.__getTransformationExternalIDs(transID)
        if not res['OK']:
            return res
        externalIDs = res['Value']
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ['Type'])
            if not res['OK']:
                self.log.error("Failed to determine transformation type")
                return res
            transType = res['Value']
            if transType in self.dataProcTTypes:
                res = self.__removeWMSTasks(externalIDs)
            else:
                res = self.__removeRequests(externalIDs)
            if not res['OK']:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
        res = self.transClient.getTransformationTasks(
            condDict={'TransformationID': transID})
        if not res['OK']:
            self.log.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res['Message'])
            return res
        externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]]
        self.log.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        """ This will remove requests from the RMS system -
    """
        rIDs = [int(long(j)) for j in requestIDs if long(j)]
        for reqID in rIDs:
            self.reqClient.cancelRequest(reqID)

        return S_OK()

    def __removeWMSTasks(self, transJobIDs):
        """ wipe out jobs and their requests from the system

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
        # Prevent 0 job IDs
        jobIDs = [int(j) for j in transJobIDs if int(j)]
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):

            res = self.wmsClient.killJob(jobList)
            if res['OK']:
                self.log.info("Successfully killed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found %s jobs which did not exist in the WMS" %
                              len(res['InvalidJobIDs']))
            elif "NonauthorizedJobIDs" in res:
                self.log.error(
                    "Failed to kill %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to kill %s jobs" %
                               len(res['FailedJobIDs']))
                allRemove = False

            res = self.wmsClient.deleteJob(jobList)
            if res['OK']:
                self.log.info("Successfully removed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found %s jobs which did not exist in the WMS" %
                              len(res['InvalidJobIDs']))
            elif "NonauthorizedJobIDs" in res:
                self.log.error(
                    "Failed to remove %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to remove %s jobs" %
                               len(res['FailedJobIDs']))
                allRemove = False

        if not allRemove:
            return S_ERROR("Failed to remove all remnants from WMS")
        self.log.info("Successfully removed all tasks from the WMS")

        if not jobIDs:
            self.log.info(
                "JobIDs not present, unable to remove asociated requests.")
            return S_OK()

        failed = 0
        failoverRequests = {}
        res = self.reqClient.getRequestIDsForJobs(jobIDs)
        if not res['OK']:
            self.log.error("Failed to get requestID for jobs.", res['Message'])
            return res
        failoverRequests.update(res['Value']['Successful'])
        if not failoverRequests:
            return S_OK()
        for jobID, requestID in res['Value']['Successful'].items():
            # Put this check just in case, tasks must have associated jobs
            if jobID == 0 or jobID == '0':
                continue
            res = self.reqClient.cancelRequest(requestID)
            if not res['OK']:
                self.log.error("Failed to remove request from RequestDB",
                               res['Message'])
                failed += 1
            else:
                self.log.verbose("Removed request %s associated to job %d." %
                                 (requestID, jobID))

        if failed:
            self.log.info("Successfully removed %s requests" %
                          (len(failoverRequests) - failed))
            self.log.info("Failed to remove %s requests" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        self.log.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Exemplo n.º 5
0
class ValidateOutputDataAgent( AgentModule ):

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    self.integrityClient = DataIntegrityClient()
    self.fc = FileCatalog()
    self.transClient = TransformationClient()
    self.fileCatalogClient = FileCatalogClient()

    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = agentTSTypes
    else:
      self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )

    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', ['TransformationDB',
                                                                                  'MetadataCatalog'] ) )
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.enableFlag = True

  #############################################################################

  def initialize( self ):
    """ Sets defaults
    """
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    gLogger.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    return S_OK()

  #############################################################################

  def execute( self ):
    """ The VerifyOutputData execution method
    """
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" )
      return S_OK( 'Disabled via CS flag' )

    gLogger.info( "-" * 40 )
    self.updateWaitingIntegrity()
    gLogger.info( "-" * 40 )

    res = self.transClient.getTransformations( {'Status':'ValidatingOutput', 'Type':self.transformationTypes} )
    if not res['OK']:
      gLogger.error( "Failed to get ValidatingOutput transformations", res['Message'] )
      return res
    transDicts = res['Value']
    if not transDicts:
      gLogger.info( "No transformations found in ValidatingOutput status" )
      return S_OK()
    gLogger.info( "Found %s transformations in ValidatingOutput status" % len( transDicts ) )
    for transDict in transDicts:
      transID = transDict['TransformationID']
      res = self.checkTransformationIntegrity( int( transID ) )
      if not res['OK']:
        gLogger.error( "Failed to perform full integrity check for transformation %d" % transID )
      else:
        self.finalizeCheck( transID )
        gLogger.info( "-" * 40 )
    return S_OK()

  def updateWaitingIntegrity( self ):
    """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput'
    """
    gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" )
    res = self.transClient.getTransformations( {'Status':'WaitingIntegrity'} )
    if not res['OK']:
      gLogger.error( "Failed to get WaitingIntegrity transformations", res['Message'] )
      return res
    transDicts = res['Value']
    if not transDicts:
      gLogger.info( "No transformations found in WaitingIntegrity status" )
      return S_OK()
    gLogger.info( "Found %s transformations in WaitingIntegrity status" % len( transDicts ) )
    for transDict in transDicts:
      transID = transDict['TransformationID']
      gLogger.info( "-" * 40 )
      res = self.integrityClient.getTransformationProblematics( int( transID ) )
      if not res['OK']:
        gLogger.error( "Failed to determine waiting problematics for transformation", res['Message'] )
      elif not res['Value']:
        res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput' )
        if not res['OK']:
          gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % ( transID ) )
        else:
          gLogger.info( "Updated status of transformation %s to ValidatedOutput" % ( transID ) )
      else:
        gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) )
    return

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ Get the directories for the supplied transformation from the transformation system
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        gLogger.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )
    if not directories:
      gLogger.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )

  @staticmethod
  def _addDirs( transID, newDirs, existingDirs ):
    for nDir in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, nDir ):
        if not nDir in existingDirs:
          existingDirs.append( nDir )
    return existingDirs

  #############################################################################
  def checkTransformationIntegrity( self, transID ):
    """ This method contains the real work
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Checking the integrity of transformation %s" % transID )
    gLogger.info( "-" * 40 )

    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      return res
    directories = res['Value']
    if not directories:
      return S_OK()

    ######################################################
    #
    # This check performs Catalog->SE for possible output directories
    #
    res = self.fc.exists( directories )
    if not res['OK']:
      gLogger.error( res['Message'] )
      return res
    for directory, error in res['Value']['Failed']:
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to determine the existance of directories" )
    directoryExists = res['Value']['Successful']
    for directory in sorted( directoryExists.keys() ):
      if not directoryExists[directory]:
        continue
      iRes = self.integrityClient.catalogDirectoryToSE( directory )
      if not iRes['OK']:
        gLogger.error( iRes['Message'] )
        return iRes

    ######################################################
    #
    # This check performs SE->Catalog for possible output directories
    #
    for storageElementName in sorted( self.activeStorages ):
      res = self.integrityClient.storageDirectoryToCatalog( directories, storageElementName )
      if not res['OK']:
        gLogger.error( res['Message'] )
        return res

    gLogger.info( "-" * 40 )
    gLogger.info( "Completed integrity check for transformation %s" % transID )
    return S_OK()

  def finalizeCheck( self, transID ):
    """ Move to 'WaitingIntegrity' or 'ValidatedOutput'
    """
    res = self.integrityClient.getTransformationProblematics( int( transID ) )
    if not res['OK']:
      gLogger.error( "Failed to determine whether there were associated problematic files", res['Message'] )
      newStatus = ''
    elif res['Value']:
      gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) )
      newStatus = "WaitingIntegrity"
    else:
      gLogger.info( "No problematics were found for transformation %s" % transID )
      newStatus = "ValidatedOutput"
    if newStatus:
      res = self.transClient.setTransformationParameter( transID, 'Status', newStatus )
      if not res['OK']:
        gLogger.error( "Failed to update status of transformation %s to %s" % ( transID, newStatus ) )
      else:
        gLogger.info( "Updated status of transformation %s to %s" % ( transID, newStatus ) )
    gLogger.info( "-" * 40 )
    return S_OK()
Exemplo n.º 6
0
class TransformationCLI(CLI, API):
    def __init__(self):
        self.server = TransformationClient()
        self.indentSpace = 4
        CLI.__init__(self)
        API.__init__(self)

    def printPair(self, key, value, separator=":"):
        valueList = value.split("\n")
        print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)),
                             separator, valueList[0].strip())
        for valueLine in valueList[1:-1]:
            print "%s  %s" % (" " * self.indentSpace, valueLine.strip())

    def do_help(self, args):
        """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commands"""
        CLI.do_help(self, args)

    # overriting default help command
    def do_helpall(self, args):
        """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
        if len(args) == 0:
            print "\nAvailable commands:\n"
            attrList = dir(self)
            attrList.sort()
            for attribute in attrList:
                if attribute.find("do_") == 0:
                    self.printPair(attribute[3:],
                                   getattr(self, attribute).__doc__[1:])
                    print ""
        else:
            command = args.split()[0].strip()
            try:
                obj = getattr(self, "do_%s" % command)
            except:
                print "There's no such %s command" % command
                return
            self.printPair(command, obj.__doc__[1:])

    def do_shell(self, args):
        """Execute a shell command

       usage !<shell_command>
    """
        comm = args
        res = shellCall(0, comm)
        if res['OK'] and res['Value'][0] == 0:
            _returnCode, stdOut, stdErr = res['Value']
            print "%s\n%s" % (stdOut, stdErr)
        else:
            print res['Message']

    def check_params(self, args, num):
        """Checks if the number of parameters correct"""
        argss = args.split()
        length = len(argss)
        if length < num:
            print "Error: Number of arguments provided %d less that required %d, please correct." % (
                length, num)
            return (False, length)
        return (argss, length)

    def check_id_or_name(self, id_or_name):
        """resolve name or Id by converting type of argument """
        if id_or_name.isdigit():
            return long(id_or_name)  # its look like id
        return id_or_name

    ####################################################################
    #
    # These are the methods for transformation manipulation
    #

    def do_getall(self, args):
        """Get transformation details

       usage: getall [Status] [Status]
    """
        oTrans = Transformation()
        oTrans.getTransformations(transStatus=args.split(), printOutput=True)

    def do_getAllByUser(self, args):
        """Get all transformations created by a given user

The first argument is the authorDN or username. The authorDN
is preferred: it need to be inside quotes because contains
white spaces. Only authorDN should be quoted.

When the username is provided instead, 
the authorDN is retrieved from the uploaded proxy,
so that the retrieved transformations are those created by
the user who uploaded that proxy: that user could be different
that the username provided to the function.

       usage: getAllByUser authorDN or username [Status] [Status]
    """
        oTrans = Transformation()
        argss = args.split()
        username = ""
        author = ""
        status = []
        if not len(argss) > 0:
            print self.do_getAllByUser.__doc__
            return

        # if the user didnt quoted the authorDN ends
        if '=' in argss[0] and argss[0][0] not in ["'", '"']:
            print "AuthorDN need to be quoted (just quote that argument)"
            return

        if argss[0][0] in ["'", '"']:  # authorDN given
            author = argss[0]
            status_idx = 1
            for arg in argss[1:]:
                author += ' ' + arg
                status_idx += 1
                if arg[-1] in ["'", '"']:
                    break
            # At this point we should have something like 'author'
            if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']:
                print "AuthorDN need to be quoted (just quote that argument)"
                return
            else:
                author = author[1:-1]  # throw away the quotes
            # the rest are the requested status
            status = argss[status_idx:]
        else:  # username given
            username = argss[0]
            status = argss[1:]

        oTrans.getTransformationsByUser(authorDN=author,
                                        userName=username,
                                        transStatus=status,
                                        printOutput=True)

    def do_summaryTransformations(self, args):
        """Show the summary for a list of Transformations

    Fields starting with 'F' ('J')  refers to files (jobs).
    Proc. stand for processed.

        Usage: summaryTransformations <ProdID> [<ProdID> ...]
    """
        argss = args.split()
        if not len(argss) > 0:
            print self.do_summaryTransformations.__doc__
            return

        transid = argss
        oTrans = Transformation()
        oTrans.getSummaryTransformations(transID=transid)

    def do_getStatus(self, args):
        """Get transformation details

       usage: getStatus <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.getTransformation(transName)
            if not res['OK']:
                print "Getting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s: %s" % (transName, res['Value']['Status'])

    def do_setStatus(self, args):
        """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
        argss = args.split()
        if not len(argss) > 1:
            print "transformation and status not supplied"
            return
        status = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, 'Status', status)
            if not res['OK']:
                print "Setting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s set to %s" % (transName, status)

    def do_start(self, args):
        """Start transformation

       usage: start <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Active')
            if not res['OK']:
                print "Setting Status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                res = self.server.setTransformationParameter(
                    transName, 'AgentType', 'Automatic')
                if not res['OK']:
                    print "Setting AgentType of %s failed: %s" % (
                        transName, res['Message'])
                else:
                    print "%s started" % transName

    def do_stop(self, args):
        """Stop transformation

       usage: stop <transID|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'AgentType', 'Manual')
            if not res['OK']:
                print "Stopping of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s stopped" % transName

    def do_flush(self, args):
        """Flush transformation

       usage: flush <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Flush')
            if not res['OK']:
                print "Flushing of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s flushing" % transName

    def do_get(self, args):
        """Get transformation definition

    usage: get <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            res['Value'].pop('Body')
            printDict(res['Value'])

    def do_getBody(self, args):
        """Get transformation body

    usage: getBody <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            print res['Value']['Body']

    def do_getFileStat(self, args):
        """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformationStats(transName)
        if not res['OK']:
            print "Failed to get statistics for %s: %s" % (transName,
                                                           res['Message'])
        else:
            res['Value'].pop('Total')
            printDict(res['Value'])

    def do_modMask(self, args):
        """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        mask = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, "FileMask", mask)
            if not res['OK']:
                print "Failed to modify input file mask for %s: %s" % (
                    transName, res['Message'])
            else:
                print "Updated %s filemask" % transName

    def do_getFiles(self, args):
        """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        status = argss[1:]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            if status:
                selectDict['Status'] = status
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                self._printFormattedDictList(
                    res['Value'],
                    ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                    'LFN', 'LFN')
            else:
                print "No files found"

    def do_getFileStatus(self, args):
        """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
        argss = args.split()
        if len(argss) < 2:
            print "transformation and file not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]

        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                filesList = []
                for fileDict in res['Value']:
                    if fileDict['LFN'] in lfns:
                        filesList.append(fileDict)
                if filesList:
                    self._printFormattedDictList(filesList, [
                        'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'
                    ], 'LFN', 'LFN')
                else:
                    print "Could not find any LFN in", lfns, "for transformation", transName
            else:
                print "No files found"

    def do_getOutputFiles(self, args):
        """Get output files for the transformation

    usage: getOutputFiles <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            fc = FileCatalog()
            meta = {}
            meta['ProdID'] = transName
            res = fc.findFilesByMetadata(meta)
            if not res['OK']:
                print res['Message']
                return
            if not len(res['Value']) > 0:
                print 'No output files yet for transformation %d' % int(
                    transName)
                return
            else:
                for lfn in res['Value']:
                    print lfn

    def do_getInputDataQuery(self, args):
        """Get input data query for the transformation

    usage: getInputDataQuery <transName|ID>
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformationInputDataQuery(transName)
        if not res['OK']:
            print "Failed to get transformation input data query: %s" % res[
                'Message']
        else:
            print res['Value']

    def do_setFileStatus(self, args):
        """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
        argss = args.split()
        if not len(argss) == 3:
            print "transformation file and status not supplied"
            return
        transName = argss[0]
        lfn = argss[1]
        status = argss[2]
        res = self.server.setFileStatusForTransformation(
            transName, status, [lfn])
        if not res['OK']:
            print "Failed to update file status: %s" % res['Message']
        else:
            print "Updated file status to %s" % status

    def do_resetFile(self, args):
        """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfns>
    """
        argss = args.split()
        if not len(argss) > 1:
            print "transformation and file(s) not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]
        res = self.server.setFileStatusForTransformation(
            transName, 'Unused', lfns)
        if not res['OK']:
            print "Failed to reset file status: %s" % res['Message']
        else:
            if 'Failed' in res['Value']:
                print "Could not reset some files: "
                for lfn, reason in res['Value']['Failed'].items():
                    print lfn, reason
            else:
                print "Updated file statuses to 'Unused' for %d file(s)" % len(
                    lfns)

    def do_resetProcessedFile(self, args):
        """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
        argss = args.split()

        if not len(argss) > 1:
            print "transformation and file(s) not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]
        res = self.server.setFileStatusForTransformation(transName,
                                                         'Unused',
                                                         lfns,
                                                         force=True)
        if not res['OK']:
            print "Failed to reset file status: %s" % res['Message']
        else:
            if 'Failed' in res['Value'] and res['Value']['Failed']:
                print "Could not reset some files: "
                for lfn, reason in res['Value']['Failed'].items():
                    print lfn, reason
            else:
                print "Updated file statuses to 'Unused' for %d file(s)" % len(
                    lfns)

    ####################################################################
    #
    # These are the methods for file manipulation
    #

    def do_addDirectory(self, args):
        """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no directory supplied"
            return
        for directory in argss:
            res = self.server.addDirectory(directory, force=True)
            if not res['OK']:
                print 'failed to add directory %s: %s' % (directory,
                                                          res['Message'])
            else:
                print 'added %s files for %s' % (res['Value'], directory)

    def do_replicas(self, args):
        """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.getReplicas(argss)
        if not res['OK']:
            print "failed to get any replica information: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to get replica information for %s: %s" % (lfn, error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            ses = sorted(res['Value']['Successful'][lfn].keys())
            outStr = "%s :" % lfn.ljust(100)
            for se in ses:
                outStr = "%s %s" % (outStr, se.ljust(15))
            print outStr

    def do_addFile(self, args):
        """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no files supplied"
            return
        lfnDict = {}
        for lfn in argss:
            lfnDict[lfn] = {
                'PFN': 'IGNORED-PFN',
                'SE': 'IGNORED-SE',
                'Size': 0,
                'GUID': 'IGNORED-GUID',
                'Checksum': 'IGNORED-CHECKSUM'
            }
        res = self.server.addFile(lfnDict, force=True)
        if not res['OK']:
            print "failed to add any files: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add %s: %s" % (lfn, error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeFile(self, args):
        """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
        argss = args.split()
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.removeFile(argss)
        if not res['OK']:
            print "failed to remove any files: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove %s: %s" % (lfn, error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_addReplica(self, args):
        """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
        argss = args.split()
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.addReplica(lfnDict, force=True)
        if not res['OK']:
            print "failed to add replica: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add replica: %s" % (error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeReplica(self, args):
        """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
        argss = args.split()
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.removeReplica(lfnDict)
        if not res['OK']:
            print "failed to remove replica: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove replica: %s" % (error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_setReplicaStatus(self, args):
        """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
        argss = args.split()
        if not len(argss) > 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        status = argss[1]
        se = argss[2]
        lfnDict = {}
        lfnDict[lfn] = {
            'Status': status,
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.setReplicaStatus(lfnDict)
        if not res['OK']:
            print "failed to set replica status: %s" % res['Message']
            return
        for lfn in sorted(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to set replica status: %s" % (error)
        for lfn in sorted(res['Value']['Successful'].keys()):
            print "updated replica status %s" % lfn
Exemplo n.º 7
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                          'TransformationName'    : types.StringTypes,
                          'Status'                : types.StringTypes,
                          'Description'           : types.StringTypes,
                          'LongDescription'       : types.StringTypes,
                          'Type'                  : types.StringTypes,
                          'Plugin'                : types.StringTypes,
                          'AgentType'             : types.StringTypes,
                          'FileMask'              : types.StringTypes,
                          'TransformationGroup'   : types.StringTypes,
                          'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                          'InheritedFrom'         : [types.IntType, types.LongType],
                          'Body'                  : types.StringTypes,
                          'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                          'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                          'TransformationName'    : '',
                          'Status'                : 'New',
                          'Description'           : '',
                          'LongDescription'       : '',
                          'Type'                  : '',
                          'Plugin'                : 'Standard',
                          'AgentType'             : 'Manual',
                          'FileMask'              : '',
                          'TransformationGroup'   : 'General',
                          'GroupSize'             : 1,
                          'InheritedFrom'         : 0,
                          'Body'                  : '',
                          'MaxNumberOfTasks'       : 0,
                          'EventsPerTask'          : 0}

    self.supportedPlugins = ['Broadcast', 'Standard', 'BySize', 'ByShare']
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError, 'TransformationID %d does not exist' % transID
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID, self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def __setSE( self, se, seList ):
    if type( seList ) in types.StringTypes:
      try:
        seList = eval( seList )
      except:
        seList = seList.replace( ',', ' ' ).split()
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = se
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError, name

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      oldValue = self.paramValues[self.item_called]
      if oldValue != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError, "%s %s %s expected one of %s" % ( self.item_called, value, type( value ), self.paramTypes[self.item_called] )
    if not self.item_called in self.paramTypes.keys():
      if not self.paramValues.has_key( self.item_called ):
        change = True
      else:
        oldValue = self.paramValues[self.item_called]
        if oldValue != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID', 'TargetSE', 'UsedSE', 'ErrorCount', 'InsertedTime', 'LastUpdate'], orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID', 'ExternalStatus', 'ExternalID', 'TargetSE', 'CreationTime', 'LastUpdateTime'], orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status', 'AgentType', 'TransformationName', 'CreationDate'], orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % string.join( res['ParameterNames'] ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def addTransformation( self, addFiles = True, printOutput = False ):
    res = self._checkCreation()
    if not res['OK']:
      return self._errorReport( res, 'Failed transformation sanity check' )
    if printOutput:
      gLogger.info( "Will attempt to create transformation with the following parameters" )
      self._prettyPrint( self.paramValues )

    res = self.transClient.addTransformation( self.paramValues['TransformationName'],
                                             self.paramValues['Description'],
                                             self.paramValues['LongDescription'],
                                             self.paramValues['Type'],
                                             self.paramValues['Plugin'],
                                             self.paramValues['AgentType'],
                                             self.paramValues['FileMask'],
                                             transformationGroup = self.paramValues['TransformationGroup'],
                                             groupSize = self.paramValues['GroupSize'],
                                             inheritedFrom = self.paramValues['InheritedFrom'],
                                             body = self.paramValues['Body'],
                                             maxTasks = self.paramValues['MaxNumberOfTasks'],
                                             eventsPerTask = self.paramValues['EventsPerTask'],
                                             addFiles = addFiles )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transID = res['Value']
    self.exists = True
    self.setTransformationID( transID )
    gLogger.info( "Created transformation %d" % transID )
    for paramName, paramValue in self.paramValues.items():
      if not self.paramTypes.has_key( paramName ):
        res = self.transClient.setTransformationParameter( transID, paramName, paramValue )
        if not res['OK']:
          gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) )
          gLogger.info( "To add this parameter later please execute the following." )
          gLogger.info( "oTransformation = Transformation(%d)" % transID )
          gLogger.info( "oTransformation.set%s(...)" % paramName )
    return S_OK( transID )

  def _checkCreation( self ):
    if self.paramValues['TransformationID']:
      gLogger.info( "You are currently working with an active transformation definition." )
      gLogger.info( "If you wish to create a new transformation reset the TransformationID." )
      gLogger.info( "oTransformation.reset()" )
      return S_ERROR()

    requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type']
    for parameter in requiredParameters:
      if not self.paramValues[parameter]:
        gLogger.info( "%s is not defined for this transformation. This is required..." % parameter )
        res = self.__promptForParameter( parameter )
        if not res['OK']:
          return res

    plugin = self.paramValues['Plugin']
    if not plugin in self.supportedPlugins:
      gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin )
      res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' )
      if not res['OK']:
        return res
    plugin = self.paramValues['Plugin']
    checkPlugin = "_check%sPlugin" % plugin
    fcn = None
    if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ):
      fcn = getattr( self, checkPlugin )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin )
    res = fcn()
    return res

  def _checkBySizePlugin( self ):
    return self._checkStandardPlugin()

  def _checkBySharePlugin( self ):
    return self._checkStandardPlugin()

  def _checkStandardPlugin( self ):
    groupSize = self.paramValues['GroupSize']
    if ( groupSize <= 0 ):
      gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." )
      res = self.setGroupSize( 1 )
      if not res['OK']:
        return res
    return S_OK()

  def _checkBroadcastPlugin( self ):
    gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( string.join( ['SourceSE', 'TargetSE'], ', ' ) ) )
    requiredParams = ['SourceSE', 'TargetSE']
    for requiredParam in requiredParams:
      if ( not self.paramValues.has_key( requiredParam ) ) or ( not self.paramValues[requiredParam] ):
        res = self.__promptForParameter( requiredParam, insert = False )
        if not res['OK']:
          return res
        paramValue = res['Value']
        setter = None
        setterName = "set%s" % requiredParam
        if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
          setter = getattr( self, setterName )
        if not setter:
          return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName )
        ses = paramValue.replace( ',', ' ' ).split()
        res = setter( ses )
        if not res['OK']:
          return res
    return S_OK()

  def __checkSEs( self, seList ):
    res = gConfig.getSections( '/Resources/StorageElements' )
    if not res['OK']:
      return self._errorReport( res, 'Failed to get possible StorageElements' )
    missing = []
    for se in seList:
      if not se in res['Value']:
        gLogger.error( "StorageElement %s is not known" % se )
        missing.append( se )
    if missing:
      return S_ERROR( "%d StorageElements not known" % len( missing ) )
    return S_OK()

  def __promptForParameter( self, parameter, choices = [], default = '', insert = True ):
    res = promptUser( "Please enter %s" % parameter, choices = choices, default = default )
    if not res['OK']:
      return self._errorReport( res )
    gLogger.info( "%s will be set to '%s'" % ( parameter, res['Value'] ) )
    paramValue = res['Value']
    if insert:
      setter = None
      setterName = "set%s" % parameter
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setter:
        return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" )
      res = setter( paramValue )
      if not res['OK']:
        return res
    return S_OK( paramValue )
class TransformationCleaningAgent(AgentModule):
  """
  .. class:: TransformationCleaningAgent

  :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
  :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
  :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)

    self.shifterProxy = None

    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = ['TransformationDB', 'MetadataCatalog']
    # # transformation metadata
    self.transfidmeta = 'TransformationID'
    # # archive periof in days
    self.archiveAfter = 7
    # # transformation log SEs
    self.logSE = 'LogSE'
    # # enable/disable execution
    self.enableFlag = 'True'

    self.dataProcTTypes = ['MCSimulation', 'Merge']
    self.dataManipTTypes = ['Replication', 'Removal']

  def initialize(self):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    # See cleanContent method: this proxy will be used ALSO when the file catalog used
    # is the DIRAC File Catalog (DFC).
    # This is possible because of unset of the "UseServerCertificate" option
    self.shifterProxy = self.am_getOption('shifterProxy', self.shifterProxy)

    # # transformations types
    self.dataProcTTypes = Operations().getValue('Transformations/DataProcessing', self.dataProcTTypes)
    self.dataManipTTypes = Operations().getValue('Transformations/DataManipulation', self.dataManipTTypes)
    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      self.transformationTypes = sorted(self.dataProcTTypes + self.dataManipTTypes)
    self.log.info("Will consider the following transformation types: %s" % str(self.transformationTypes))
    # # directory locations
    self.directoryLocations = sorted(self.am_getOption('DirectoryLocations', self.directoryLocations))
    self.log.info("Will search for directories in the following locations: %s" % str(self.directoryLocations))
    # # transformation metadata
    self.transfidmeta = self.am_getOption('TransfIDMeta', self.transfidmeta)
    self.log.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta)
    # # archive periof in days
    self.archiveAfter = self.am_getOption('ArchiveAfter', self.archiveAfter)  # days
    self.log.info("Will archive Completed transformations after %d days" % self.archiveAfter)
    # # transformation log SEs
    self.logSE = Operations().getValue('/LogStorage/LogSE', self.logSE)
    self.log.info("Will remove logs found on storage element: %s" % self.logSE)

    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute(self):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
    if self.enableFlag != 'True':
      self.log.info('TransformationCleaningAgent is disabled by configuration option EnableFlag')
      return S_OK('Disabled via CS flag')

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations({'Status': 'Cleaning',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeClean(transDict)
        else:
          self.log.info("Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeClean)(transDict,
                                                   proxyUserDN=transDict['AuthorDN'],
                                                   proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Failed to get transformations", res['Message'])

    # Obtain the transformations in RemovingFiles status and removes the output files
    res = self.transClient.getTransformations({'Status': 'RemovingFiles',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeRemoval(transDict)
        else:
          self.log.info("Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeRemoval)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
    res = self.transClient.getTransformations({'Status': 'Completed',
                                               'Type': self.transformationTypes},
                                              older=olderThanTime,
                                              timeStamp='LastUpdate')
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeArchive(transDict)
        else:
          self.log.info("Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeArchive)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])
    return S_OK()

  def _executeClean(self, transDict):
    """Clean transformation."""
    # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
    # We just archive
    if transDict['Type'] in self.dataManipTTypes:
      res = self.archiveTransformation(transDict['TransformationID'])
      if not res['OK']:
        self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'],
                                                                     res['Message']))
    else:
      res = self.cleanTransformation(transDict['TransformationID'])
      if not res['OK']:
        self.log.error("Problems cleaning transformation %s: %s" % (transDict['TransformationID'],
                                                                    res['Message']))

  def _executeRemoval(self, transDict):
    """Remove files from given transformation."""
    res = self.removeTransformationOutput(transDict['TransformationID'])
    if not res['OK']:
      self.log.error("Problems removing transformation %s: %s" % (transDict['TransformationID'],
                                                                  res['Message']))

  def _executeArchive(self, transDict):
    """Archive the given transformation."""
    res = self.archiveTransformation(transDict['TransformationID'])
    if not res['OK']:
      self.log.error("Problems archiving transformation %s: %s" % (transDict['TransformationID'],
                                                                   res['Message']))

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories(self, transID):
    """ get the directories for the supplied transformation from the transformation system.
        These directories are used by removeTransformationOutput and cleanTransformation for removing output.

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.verbose("Cleaning Transformation directories of transformation %d" % transID)
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters(transID, ['OutputDirectories'])
      if not res['OK']:
        self.log.error("Failed to obtain transformation directories", res['Message'])
        return res
      transDirectories = []
      if res['Value']:
        if not isinstance(res['Value'], list):
          try:
            transDirectories = ast.literal_eval(res['Value'])
          except BaseException:
            # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
            transDirectories.append(res['Value'])
        else:
          transDirectories = res['Value']
      directories = self._addDirs(transID, transDirectories, directories)

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata({self.transfidmeta: transID})
      if not res['OK']:
        self.log.error("Failed to obtain metadata catalog directories", res['Message'])
        return res
      transDirectories = res['Value']
      directories = self._addDirs(transID, transDirectories, directories)

    if not directories:
      self.log.info("No output directories found")
    directories = sorted(directories)
    return S_OK(directories)

  @classmethod
  def _addDirs(cls, transID, newDirs, existingDirs):
    """ append unique :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str(transID).zfill(8)
      if re.search(transStr, str(folder)):
        if folder not in existingDirs:
          existingDirs.append(os.path.normpath(folder))
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanContent(self, directory):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    self.log.verbose("Cleaning Catalog contents")
    res = self.__getCatalogDirectoryContents([directory])
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info("No files are registered in the catalog directory %s" % directory)
      return S_OK()
    self.log.info("Attempting to remove %d possible remnants from the catalog and storage" % len(filesFound))

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
    res = DataManager().removeFile(filesFound, force=True)
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str(reason):
        self.log.warn("File %s not found in some catalog: " % (lfn))
      else:
        self.log.error("Failed to remove file found in the catalog", "%s %s" % (lfn, reason))
        realFailure = True
    if realFailure:
      return S_ERROR("Failed to remove all files found in the catalog")
    return S_OK()

  def __getCatalogDirectoryContents(self, directories):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info('Obtaining the catalog contents for %d directories:' % len(directories))
    for directory in directories:
      self.log.info(directory)
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while activeDirs:
      currentDir = activeDirs[0]
      res = returnSingleResult(fc.listDirectory(currentDir))
      activeDirs.remove(currentDir)
      if not res['OK'] and 'Directory does not exist' in res['Message']:  # FIXME: DFC should return errno
        self.log.info("The supplied directory %s does not exist" % currentDir)
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info("%s: %s" % (currentDir, res['Message']))
        else:
          self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message']))
      else:
        dirContents = res['Value']
        activeDirs.extend(dirContents['SubDirs'])
        allFiles.update(dirContents['Files'])
    self.log.info("Found %d files" % len(allFiles))
    return S_OK(allFiles.keys())

  def cleanTransformationLogFiles(self, directory):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.verbose("Removing log files found in the directory %s" % directory)
    res = returnSingleResult(StorageElement(self.logSE).removeDirectory(directory, recursive=True))
    if not res['OK']:
      if cmpError(res, errno.ENOENT):  # No such file or directory
        self.log.warn("Transformation log directory does not exist", directory)
        return S_OK()
      self.log.error("Failed to remove log files", res['Message'])
      return res
    self.log.info("Successfully removed transformation log directory")
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput(self, transID):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info("Removing output data for transformation %s" % transID)
    res = self.getTransformationDirectories(transID)
    if not res['OK']:
      self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res))
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search('/LOG/', directory):
        res = self.cleanContent(directory)
        if not res['OK']:
          return res

    self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" % (len(directories), transID))
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully removed output of transformation %d" % transID)
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter(transID, 'Status', 'RemovedFiles')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to RemovedFiles" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to RemovedFiles" % (transID))
    return S_OK()

  def archiveTransformation(self, transID):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info("Archiving transformation %s" % transID)
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks(transID)
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully archived transformation %d" % transID)
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter(transID, 'Status', 'Archived')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to Archived" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to Archived" % (transID))
    return S_OK()

  def cleanTransformation(self, transID):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info("Cleaning transformation %s" % transID)
    res = self.getTransformationDirectories(transID)
    if not res['OK']:
      self.log.error('Problem obtaining directories for transformation %s with result "%s"' % (transID, res))
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks(transID)
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search('/LOG/', directory):
        res = self.cleanTransformationLogFiles(directory)
        if not res['OK']:
          return res
      res = self.cleanContent(directory)
      if not res['OK']:
        return res

    # Clean ALL the possible remnants found
    res = self.cleanMetadataCatalogFiles(transID)
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation(transID)
    if not res['OK']:
      return res
    self.log.info("Successfully cleaned transformation %d" % transID)
    res = self.transClient.setTransformationParameter(transID, 'Status', 'Cleaned')
    if not res['OK']:
      self.log.error("Failed to update status of transformation %s to Cleaned" % (transID), res['Message'])
      return res
    self.log.info("Updated status of transformation %s to Cleaned" % (transID))
    return S_OK()

  def cleanMetadataCatalogFiles(self, transID):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata({self.transfidmeta: transID})
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info('No files found for transID %s' % transID)
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')
    res = DataManager().removeFile(fileToRemove, force=True)
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'true')

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error("Failed to remove file found in metadata catalog", "%s %s" % (lfn, reason))
    if res['Value']['Failed']:
      return S_ERROR("Failed to remove all files found in the metadata catalog")
    self.log.info("Successfully removed all files found in the BK")
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks(self, transID):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    self.log.verbose("Cleaning Transformation tasks of transformation %d" % transID)
    res = self.__getTransformationExternalIDs(transID)
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters(transID, ['Type'])
      if not res['OK']:
        self.log.error("Failed to determine transformation type")
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks(externalIDs)
      else:
        res = self.__removeRequests(externalIDs)
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs(self, transID):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks(condDict={'TransformationID': transID})
    if not res['OK']:
      self.log.error("Failed to get externalIDs for transformation %d" % transID, res['Message'])
      return res
    externalIDs = [taskDict['ExternalID'] for taskDict in res["Value"]]
    self.log.info("Found %d tasks for transformation" % len(externalIDs))
    return S_OK(externalIDs)

  def __removeRequests(self, requestIDs):
    """ This will remove requests from the RMS system -
    """
    rIDs = [int(long(j)) for j in requestIDs if long(j)]
    for reqID in rIDs:
      self.reqClient.cancelRequest(reqID)

    return S_OK()

  def __removeWMSTasks(self, transJobIDs):
    """ wipe out jobs and their requests from the system

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [int(j) for j in transJobIDs if int(j)]
    allRemove = True
    for jobList in breakListIntoChunks(jobIDs, 500):

      res = self.wmsClient.killJob(jobList)
      if res['OK']:
        self.log.info("Successfully killed %d jobs from WMS" % len(jobList))
      elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res):
        self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs']))
      elif "NonauthorizedJobIDs" in res:
        self.log.error("Failed to kill %s jobs because not authorized" % len(res['NonauthorizedJobIDs']))
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error("Failed to kill %s jobs" % len(res['FailedJobIDs']))
        allRemove = False

      res = self.wmsClient.deleteJob(jobList)
      if res['OK']:
        self.log.info("Successfully removed %d jobs from WMS" % len(jobList))
      elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs" not in res) and ("FailedJobIDs" not in res):
        self.log.info("Found %s jobs which did not exist in the WMS" % len(res['InvalidJobIDs']))
      elif "NonauthorizedJobIDs" in res:
        self.log.error("Failed to remove %s jobs because not authorized" % len(res['NonauthorizedJobIDs']))
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error("Failed to remove %s jobs" % len(res['FailedJobIDs']))
        allRemove = False

    if not allRemove:
      return S_ERROR("Failed to remove all remnants from WMS")
    self.log.info("Successfully removed all tasks from the WMS")

    if not jobIDs:
      self.log.info("JobIDs not present, unable to remove asociated requests.")
      return S_OK()

    failed = 0
    failoverRequests = {}
    res = self.reqClient.getRequestIDsForJobs(jobIDs)
    if not res['OK']:
      self.log.error("Failed to get requestID for jobs.", res['Message'])
      return res
    failoverRequests.update(res['Value']['Successful'])
    if not failoverRequests:
      return S_OK()
    for jobID, requestID in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.cancelRequest(requestID)
      if not res['OK']:
        self.log.error("Failed to remove request from RequestDB", res['Message'])
        failed += 1
      else:
        self.log.verbose("Removed request %s associated to job %d." % (requestID, jobID))

    if failed:
      self.log.info("Successfully removed %s requests" % (len(failoverRequests) - failed))
      self.log.info("Failed to remove %s requests" % failed)
      return S_ERROR("Failed to remove all the request from RequestDB")
    self.log.info("Successfully removed all the associated failover requests")
    return S_OK()
t.setTransformationName(transferName) # Must be unique
t.setTransformationGroup("Transfer")
t.setType("Transfer-JUNO")
#t.setPlugin("Standard") # Not needed. The default is 'Standard'
t.setDescription("Test Data Transfer")
t.setLongDescription( "Long description of Data Transfer" ) # Mandatory
t.setGroupSize(3) # Here you specify how many files should be grouped within he same request, e.g. 100

transBody = [ ( "ReplicateAndRegister", { "SourceSE": fromSE, "TargetSE": toSE }) ]

t.setBody ( transBody ) # Mandatory

result = t.addTransformation() # Transformation is created here
if not result['OK']:
    gLogger.error('Can not add transformation: %s' % result['Message'])
    exit(2)

t.setStatus("Active")
t.setAgentType("Automatic")
transID = t.getTransformationID()

result = tc.addFilesToTransformation(transID['Value'], infileList) # Files are added here
if not result['OK']:
    gLogger.error('Can not add files to transformation: %s' % result['Message'])
    exit(2)

result = tc.setTransformationParameter( transID['Value'], 'Status', 'Flush' )
if not result['OK']:
    gLogger.error('Can not flush transformation: %s' % result['Message'])
    exit(2)
Exemplo n.º 10
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError('TransformationID %d does not exist' %
                                     transID)
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def __setSE(self, seParam, seList):
        if isinstance(seList, basestring):
            try:
                seList = eval(seList)
            except:
                seList = seList.split(',')
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
        username = ""
        author = ""
        res = getProxyInfo()
        if res['OK']:
            author = res['Value']['identity']
            username = res['Value']['username']
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res['Message'])
            return S_ERROR(res['Message'])

        res = {'username': username, 'authorDN': author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(self,
                                 authorDN="",
                                 userName="",
                                 transID=[],
                                 transStatus=[],
                                 outputFields=[
                                     'TransformationID', 'Status', 'AgentType',
                                     'TransformationName', 'CreationDate',
                                     'AuthorDN'
                                 ],
                                 orderBy='TransformationID',
                                 printOutput=False):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res['OK']:
                gLogger.error(res['Message'])
                return S_ERROR(res['Message'])
            else:
                foundUserName = res['Value']['username']
                foundAuthor = res['Value']['authorDN']
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ', '.join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ', '.join(transStatus)))

        condDict['AuthorDN'] = authorDN
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
        condDict = {'TransformationID': transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\
                          'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\
                          'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled']
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\
                               'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled']
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result['OK']:
            self._prettyPrint(result)
            return result

        if result['Value']['TotalRecords'] > 0:
            try:
                paramNames = result['Value']['ParameterNames']
                for paramValues in result['Value']['Records']:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception, x:
                print 'Exception %s ' % str(x)

        if not len(dictList) > 0:
            gLogger.error(
                'No found transformations satisfying input condition')
            return S_ERROR(
                'No found transformations satisfying input condition')
        else:
            print self._printFormattedDictList(dictList, paramShowNamesShort,
                                               paramShowNamesShort[0],
                                               paramShowNamesShort[0])

        return S_OK(dictList)
Exemplo n.º 11
0
class TransformationAgent(AgentModule):
    def initialize(self):
        """ standard init
    """
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.checkCatalog = self.am_getOption('CheckCatalog', 'yes')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        self.maxFiles = self.am_getOption('MaxFiles', 5000)

        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transDB = TransformationClient('TransformationDB')
        self.rm = ReplicaManager()
        self.unusedFiles = {}
        return S_OK()

    def execute(self):
        """ get and process the transformations to be processed
    """
        res = self.getTransformations()
        if not res['OK']:
            gLogger.info("execute: Failed to obtain transformations: %s" %
                         res['Message'])
            return S_OK()
        # Process the transformations
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            gLogger.info("execute: Processing transformation %s." % transID)
            startTime = time.time()
            res = self.processTransformation(transDict)
            if not res['OK']:
                gLogger.info("execute: Failed to process transformation: %s" %
                             res['Message'])
            else:
                gLogger.info(
                    "execute: Processed transformation in %.1f seconds" %
                    (time.time() - startTime))
        return S_OK()

    def getTransformations(self):
        """ Obtain the transformations to be executed 
    """
        transName = self.am_getOption('Transformation', 'All')
        if transName == 'All':
            gLogger.info(
                "getTransformations: Initializing general purpose agent.")
            res = self.transDB.getTransformations(
                {'Status': self.transformationStatus}, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "getTransformations: Failed to get transformations: %s" %
                    res['Message'])
                return res
            transformations = res['Value']
            gLogger.info(
                "getTransformations: Obtained %d transformations to process" %
                len(transformations))
        else:
            gLogger.info(
                "getTransformations: Initializing for transformation %s." %
                transName)
            res = self.transDB.getTransformation(transName, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "getTransformations: Failed to get transformation: %s." %
                    res['Message'])
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def processTransformation(self, transDict):
        transID = transDict['TransformationID']
        # First get the LFNs associated to the transformation
        res = self.transDB.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': 'Unused'
        })
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to obtain input data: %s." %
                res['Message'])
            return res
        transFiles = res['Value']
        lfns = res['LFNs']

        if not lfns:
            gLogger.info(
                "processTransformation: No 'Unused' files found for transformation."
            )
            if transDict['Status'] == 'Flush':
                res = self.transDB.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    gLogger.error(
                        "processTransformation: Failed to update transformation status to 'Active': %s."
                        % res['Message'])
                else:
                    gLogger.info(
                        "processTransformation: Updated transformation status to 'Active'."
                    )
            return S_OK()
        #Check if something new happened
        if len(lfns) == self.unusedFiles.get(
                transID, 0) and transDict['Status'] != 'Flush':
            gLogger.info(
                "processTransformation: No new 'Unused' files found for transformation."
            )
            return S_OK()

        replicateOrRemove = transDict['Type'].lower() in [
            "replication", "removal"
        ]
        # Limit the number of LFNs to be considered for replication or removal as they are treated individually
        if replicateOrRemove:
            lfns = lfns[0:self.maxFiles - 1]
        unusedFiles = len(lfns)
        # Check the data is available with replicas
        res = self.__getDataReplicas(transID,
                                     lfns,
                                     active=not replicateOrRemove)
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to get data replicas: %s" %
                res['Message'])
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        plugin = 'Standard'
        if transDict.has_key('Plugin') and transDict['Plugin']:
            plugin = transDict['Plugin']
        gLogger.info(
            "processTransformation: Processing transformation with '%s' plug-in."
            % plugin)
        res = self.__generatePluginObject(plugin)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.generateTasks()
        if not res['OK']:
            gLogger.error(
                "processTransformation: Failed to generate tasks for transformation: %s"
                % res['Message'])
            return res
        tasks = res['Value']
        # Create the tasks
        allCreated = True
        created = 0
        for se, lfns in tasks:
            res = self.transDB.addTaskForTransformation(transID, lfns, se)
            if not res['OK']:
                gLogger.error(
                    "processTransformation: Failed to add task generated by plug-in: %s."
                    % res['Message'])
                allCreated = False
            else:
                created += 1
                unusedFiles -= len(lfns)
        if created:
            gLogger.info(
                "processTransformation: Successfully created %d tasks for transformation."
                % created)
        self.unusedFiles[transID] = unusedFiles

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = self.transDB.setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                gLogger.error(
                    "processTransformation: Failed to update transformation status to 'Active': %s."
                    % res['Message'])
            else:
                gLogger.info(
                    "processTransformation: Updated transformation status to 'Active'."
                )
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def __generatePluginObject(self, plugin):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except ImportError, e:
            gLogger.exception(
                "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s"
                % (plugin, e))
            return S_ERROR()
        try:
            plugin_o = getattr(plugModule, 'TransformationPlugin')(
                '%s' % plugin,
                transClient=self.transDB,
                replicaManager=self.rm)
            return S_OK(plugin_o)
        except AttributeError, e:
            gLogger.exception(
                "__generatePluginObject: Failed to create %s(): %s." %
                (plugin, e))
            return S_ERROR()
Exemplo n.º 12
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                          'TransformationName'    : types.StringTypes,
                          'Status'                : types.StringTypes,
                          'Description'           : types.StringTypes,
                          'LongDescription'       : types.StringTypes,
                          'Type'                  : types.StringTypes,
                          'Plugin'                : types.StringTypes,
                          'AgentType'             : types.StringTypes,
                          'FileMask'              : types.StringTypes,
                          'TransformationGroup'   : types.StringTypes,
                          'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                          'InheritedFrom'         : [types.IntType, types.LongType],
                          'Body'                  : types.StringTypes,
                          'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                          'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                          'TransformationName'    : '',
                          'Status'                : 'New',
                          'Description'           : '',
                          'LongDescription'       : '',
                          'Type'                  : '',
                          'Plugin'                : 'Standard',
                          'AgentType'             : 'Manual',
                          'FileMask'              : '',
                          'TransformationGroup'   : 'General',
                          'GroupSize'             : 1,
                          'InheritedFrom'         : 0,
                          'Body'                  : '',
                          'MaxNumberOfTasks'       : 0,
                          'EventsPerTask'          : 0}
    self.ops = Operations()
    self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins',
                                              ['Broadcast', 'Standard', 'BySize', 'ByShare'] )
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError( 'TransformationID %d does not exist' % transID )
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID,
                                                                                   self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def __setSE( self, seParam, seList ):
    if isinstance( seList, basestring ):
      try:
        seList = eval( seList )
      except:
        seList = seList.split( ',' )
    elif isinstance( seList, ( list, dict, tuple ) ):
      seList = list( seList )
    else:
      return S_ERROR( "Bad argument type" )
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = seParam
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError( name )

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called )

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      if self.paramValues[self.item_called] != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ),
                                                             self.paramTypes[self.item_called] ) )
    else:
      if self.item_called not in self.paramValues:
        change = True
      else:
        if self.paramValues[self.item_called] != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID',
                                                                                'TargetSE', 'UsedSE', 'ErrorCount',
                                                                                'InsertedTime', 'LastUpdate'],
                             orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID',
                                                                                   'ExternalStatus', 'ExternalID',
                                                                                   'TargetSE', 'CreationTime',
                                                                                   'LastUpdateTime'],
                             orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                'AgentType', 'TransformationName',
                                                                                'CreationDate'],
                         orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getAuthorDNfromProxy( self ):
    """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
    username = ""
    author   = ""
    res = getProxyInfo()
    if res['OK']:
      author   = res['Value']['identity']
      username = res['Value']['username']
    else:
      gLogger.error( "Unable to get uploaded proxy Info %s " %res['Message'] )
      return S_ERROR( res['Message'] )

    res = {'username' : username, 'authorDN' : author }
    return S_OK( res )

  #############################################################################
  def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                                     'AgentType', 'TransformationName',
                                                                                                     'CreationDate', 'AuthorDN'],
                                orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if authorDN == "":
      res = self.getAuthorDNfromProxy()
      if not res['OK']:
        gLogger.error( res['Message'] )
        return S_ERROR( res['Message'] )
      else:
        foundUserName = res['Value']['username']
        foundAuthor   = res['Value']['authorDN']
        # If the username whom created the uploaded proxy is different than the provided username report error and exit
        if not ( userName == ""  or userName == foundUserName ):
          gLogger.error("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName))
          return S_ERROR("Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" %(userName, foundUserName))

        userName = foundUserName
        authorDN = foundAuthor
        gLogger.info("Will list transformations created by user '%s' with status '%s'" %(userName, ', '.join( transStatus )))
    else:
      gLogger.info("Will list transformations created by '%s' with status '%s'" %(authorDN, ', '.join( transStatus )))

    condDict['AuthorDN'] = authorDN
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res

    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getSummaryTransformations( self , transID = []):
    """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
    condDict = { 'TransformationID' : transID }
    orderby = []
    start = 0
    maxitems = len(transID)
    paramShowNames = ['TransformationID','Type','Status','Files_Total','Files_PercentProcessed',\
                      'Files_Processed','Files_Unused','Jobs_TotalCreated','Jobs_Waiting',\
                      'Jobs_Running','Jobs_Done','Jobs_Failed','Jobs_Stalled']
    # Below, the header used for each field in the printing: short to fit in one line
    paramShowNamesShort = ['TransID','Type','Status','F_Total','F_Proc.(%)','F_Proc.',\
                           'F_Unused','J_Created','J_Wait','J_Run','J_Done','J_Fail','J_Stalled']
    dictList = []

    result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems )
    if not result['OK']:
      self._prettyPrint( result )
      return result

    if result['Value']['TotalRecords'] > 0:
      try:
        paramNames = result['Value']['ParameterNames']
        for paramValues in result['Value']['Records']:
          paramShowValues = map(lambda pname: paramValues[ paramNames.index(pname) ], paramShowNames)
          showDict = dict(zip( paramShowNamesShort, paramShowValues ))
          dictList.append( showDict )

      except Exception, x:
        print 'Exception %s ' %str(x)

    if not len(dictList) > 0:
      gLogger.error( 'No found transformations satisfying input condition')
      return S_ERROR( 'No found transformations satisfying input condition')
    else:
      print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] )

    return S_OK( dictList )
Exemplo n.º 13
0
class TransformationCleaningAgent( AgentModule ):
  """
  .. class:: TransformationCleaningAgent

  :param DataManger dm: DataManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    # # data manager
    self.dm = None
    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )

    # # data manager
#     self.dm = DataManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in self.dataManipTTypes:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )
  # FIXME If a classmethod, should it not have cls instead of self?
  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    """ append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    """ delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    """
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    """ wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    """
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )

    se = StorageElement( storageElement )

    res = se.getPfnForLfn( [directory] )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    if directory in res['Value']['Failed']:
      self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) )
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'][directory]

    res = returnSingleResult( se.exists( storageDirectory ) )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info( "No files are registered in the catalog directory %s" % directory )
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( filesFound, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str( reason ):
        self.log.warn( "File %s not found in some catalog: " % ( lfn ) )
      else:
        self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
        realFailure = True
    if realFailure:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = returnSingleResult( fc.listDirectory( currentDir ) )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info( "%s: %s" % ( currentDir, res['Message'] ) )
        else:
          self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( fileToRemove, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks( externalIDs )
      else:
        res = self.__removeRequests( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    """ This will remove requests from the (new) RMS system -

        #FIXME: if the old system is still installed, it won't remove anything!!!
        (we don't want to risk removing from the new RMS what is instead in the old)
    """
    # FIXME: checking if the old system is still installed!
    from DIRAC.ConfigurationSystem.Client import PathFinder
    if PathFinder.getServiceURL( "RequestManagement/RequestManager" ):
      self.log.warn( "NOT removing requests!!" )
      return S_OK()

    rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ]
    for requestName in rIDs:
      self.reqClient.deleteRequest( requestName )

    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    """ wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    failed = 0
    # FIXME: double request client: old/new -> only the new will survive sooner or later
    # this is the old
    try:
      res = RequestClient().getRequestForJobs( jobIDs )
      if not res['OK']:
        self.log.error( "Failed to get requestID for jobs.", res['Message'] )
        return res
      failoverRequests = res['Value']
      self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) )
      if not failoverRequests:
        return S_OK()
      for jobID, requestName in failoverRequests.items():
        # Put this check just in case, tasks must have associated jobs
        if jobID == 0 or jobID == '0':
          continue
        res = RequestClient().deleteRequest( requestName )
        if not res['OK']:
          self.log.error( "Failed to remove request from RequestDB", res['Message'] )
          failed += 1
        else:
          self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    except RuntimeError:
      failoverRequests = {}
      pass

    # FIXME: and this is the new
    res = self.reqClient.getRequestNamesForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests.update( res['Value']['Successful'] )
    if not failoverRequests:
      return S_OK()
    for jobID, requestName in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )


    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 14
0
class TransformationCleaningAgent(AgentModule):

    #############################################################################
    def initialize(self):
        """Sets defaults """
        self.replicaManager = ReplicaManager()
        self.transClient = TransformationClient()
        self.wmsClient = WMSClient()
        self.requestClient = RequestClient()
        self.metadataClient = FileCatalogClient()
        self.storageUsageClient = StorageUsageClient()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        self.transformationTypes = sortList(
            self.am_getOption('TransformationTypes', [
                'MCSimulation', 'DataReconstruction', 'DataStripping',
                'MCStripping', 'Merge', 'Replication'
            ]))
        gLogger.info("Will consider the following transformation types: %s" %
                     str(self.transformationTypes))
        self.directoryLocations = sortList(
            self.am_getOption(
                'DirectoryLocations',
                ['TransformationDB', 'StorageUsage', 'MetadataCatalog']))
        gLogger.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              "TransformationID")
        gLogger.info("Will use %s as metadata tag name for TransformationID" %
                     self.transfidmeta)
        self.archiveAfter = self.am_getOption('ArchiveAfter', 7)  # days
        gLogger.info("Will archive Completed transformations after %d days" %
                     self.archiveAfter)
        self.activeStorages = sortList(self.am_getOption('ActiveSEs', []))
        gLogger.info("Will check the following storage elements: %s" %
                     str(self.activeStorages))
        self.logSE = self.am_getOption('TransformationLogSE', 'LogSE')
        gLogger.info("Will remove logs found on storage element: %s" %
                     self.logSE)
        return S_OK()

    #############################################################################
    def execute(self):
        """ The TransformationCleaningAgent execution method.
    """
        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag'
                % (self.section))
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Cleaning',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                self.cleanTransformation(transDict['TransformationID'])

        # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
        res = self.transClient.getTransformations({
            'Status':
            'RemovingFiles',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                self.removeTransformationOutput(transDict['TransformationID'])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                'Status': 'Completed',
                'Type': self.transformationTypes
            },
            older=olderThanTime)
        if res['OK']:
            for transDict in res['Value']:
                self.archiveTransformation(transDict['TransformationID'])

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ Get the directories for the supplied transformation from the transformation system """
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                gLogger.error("Failed to obtain transformation directories",
                              res['Message'])
                return res
            transDirectories = res['Value'].splitlines()
            directories = self.__addDirs(transID, transDirectories,
                                         directories)

        if 'StorageUsage' in self.directoryLocations:
            res = self.storageUsageClient.getStorageDirectories(
                '', '', transID, [])
            if not res['OK']:
                gLogger.error("Failed to obtain storage usage directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self.__addDirs(transID, transDirectories,
                                         directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                gLogger.error("Failed to obtain metadata catalog directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self.__addDirs(transID, transDirectories,
                                         directories)
        if not directories:
            gLogger.info("No output directories found")
        directories = sortList(directories)
        return S_OK(directories)

    def __addDirs(self, transID, newDirs, existingDirs):
        for dir in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, dir):
                if not dir in existingDirs:
                    existingDirs.append(dir)
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanStorageContents(self, directory):
        for storageElement in self.activeStorages:
            res = self.__removeStorageDirectory(directory, storageElement)
            if not res['OK']:
                return res
        return S_OK()

    def __removeStorageDirectory(self, directory, storageElement):
        gLogger.info('Removing the contents of %s at %s' %
                     (directory, storageElement))
        res = self.replicaManager.getPfnForLfn([directory], storageElement)
        if not res['OK']:
            gLogger.error("Failed to get PFN for directory", res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to obtain directory PFN from LFN',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to obtain directory PFN from LFNs')
        storageDirectory = res['Value']['Successful'].values()[0]
        res = self.replicaManager.getStorageFileExists(storageDirectory,
                                                       storageElement,
                                                       singleFile=True)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directory",
                          res['Message'])
            return res
        exists = res['Value']
        if not exists:
            gLogger.info("The directory %s does not exist at %s " %
                         (directory, storageElement))
            return S_OK()
        res = self.replicaManager.removeStorageDirectory(storageDirectory,
                                                         storageElement,
                                                         recursive=True,
                                                         singleDirectory=True)
        if not res['OK']:
            gLogger.error("Failed to remove storage directory", res['Message'])
            return res
        gLogger.info("Successfully removed %d files from %s at %s" %
                     (res['Value']['FilesRemoved'], directory, storageElement))
        return S_OK()

    def cleanCatalogContents(self, directory):
        res = self.__getCatalogDirectoryContents([directory])
        if not res['OK']:
            return res
        filesFound = res['Value']
        if not filesFound:
            return S_OK()
        gLogger.info(
            "Attempting to remove %d possible remnants from the catalog and storage"
            % len(filesFound))
        res = self.replicaManager.removeFile(filesFound)
        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            gLogger.error("Failed to remove file found in the catalog",
                          "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        gLogger.info('Obtaining the catalog contents for %d directories:' %
                     len(directories))
        for directory in directories:
            gLogger.info(directory)
        activeDirs = directories
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.replicaManager.getCatalogListDirectory(currentDir,
                                                              singleFile=True)
            activeDirs.remove(currentDir)
            if not res['OK'] and res['Message'].endswith(
                    'The supplied path does not exist'):
                gLogger.info("The supplied directory %s does not exist" %
                             currentDir)
            elif not res['OK']:
                gLogger.error('Failed to get directory contents',
                              '%s %s' % (currentDir, res['Message']))
            else:
                dirContents = res['Value']
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])
        gLogger.info("Found %d files" % len(allFiles))
        return S_OK(allFiles.keys())

    def cleanTransformationLogFiles(self, directory):
        gLogger.info("Removing log files found in the directory %s" %
                     directory)
        res = self.replicaManager.removeStorageDirectory(directory,
                                                         self.logSE,
                                                         singleDirectory=True)
        if not res['OK']:
            gLogger.error("Failed to remove log files", res['Message'])
            return res
        gLogger.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """ This just removes any mention of the output data from the catalog and storage """
        gLogger.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            gLogger.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        for directory in directories:
            if not re.search('/LOG/', directory):
                res = self.cleanCatalogContents(directory)
                if not res['OK']:
                    return res
                res = self.cleanStorageContents(directory)
                if not res['OK']:
                    return res
        gLogger.info(
            "Removed directories in the catalog and storage for transformation"
        )
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID, directories)
        if not res['OK']:
            return res
        gLogger.info("Successfully removed output of transformation %d" %
                     transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'RemovedFiles')
        if not res['OK']:
            gLogger.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res['Message'])
            return res
        gLogger.info("Updated status of transformation %s to RemovedFiles" %
                     (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """ This just removes job from the jobDB and the transformation DB """
        gLogger.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        gLogger.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Archived')
        if not res['OK']:
            gLogger.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res['Message'])
            return res
        gLogger.info("Updated status of transformation %s to Archived" %
                     (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """ This removes any mention of the supplied transformation 
    """
        gLogger.info("Cleaning transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            gLogger.error(
                'Problem obtaining directories for transformation %s with result "%s"'
                % (transID, res))
            return S_OK()
        directories = res['Value']
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res['OK']:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search('/LOG/', directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res['OK']:
                    return res
            res = self.cleanCatalogContents(directory)
            if not res['OK']:
                return res
            res = self.cleanStorageContents(directory)
            if not res['OK']:
                return res
        # Clean ALL the possible remnants found in the BK
        res = self.cleanMetadataCatalogFiles(transID, directories)
        if not res['OK']:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res['OK']:
            return res
        gLogger.info("Successfully cleaned transformation %d" % transID)
        # Change the status of the transformation to deleted
        res = self.transClient.setTransformationParameter(
            transID, 'Status', 'Deleted')
        if not res['OK']:
            gLogger.error(
                "Failed to update status of transformation %s to Deleted" %
                (transID), res['Message'])
            return res
        gLogger.info("Updated status of transformation %s to Deleted" %
                     (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID, directories):
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res['OK']:
            return res
        fileToRemove = res['Value']
        if not len(fileToRemove):
            gLogger.info('No files found for transID %s' % transID)
            return S_OK()
        res = self.replicaManager.removeFile(fileToRemove)
        if not res['OK']:
            return res
        for lfn, reason in res['Value']['Failed'].items():
            gLogger.error("Failed to remove file found in metadata catalog",
                          "%s %s" % (lfn, reason))
        if res['Value']['Failed']:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        gLogger.info("Successfully removed all files found in the BK")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        res = self.__getTransformationExternalIDs(transID)
        if not res['OK']:
            return res
        externalIDs = res['Value']
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ['Type'])
            if not res['OK']:
                gLogger.error("Failed to determine transformation type")
                return res
            transType = res['Value']
            if transType == 'Replication':
                res = self.__removeRequests(externalIDs)
            else:
                res = self.__removeWMSTasks(externalIDs)
            if not res['OK']:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        res = self.transClient.getTransformationTasks(
            condDict={'TransformationID': transID})
        if not res['OK']:
            gLogger.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res['Message'])
            return res
        externalIDs = []
        for taskDict in res['Value']:
            externalIDs.append(taskDict['ExternalID'])
        gLogger.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        gLogger.error("Not removing requests but should do")
        return S_OK()

    def __removeWMSTasks(self, jobIDs):
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):
            res = self.wmsClient.deleteJob(jobList)
            if res['OK']:
                gLogger.info("Successfully removed %d jobs from WMS" %
                             len(jobList))
            elif (res.has_key('InvalidJobIDs')) and (
                    not res.has_key('NonauthorizedJobIDs')) and (
                        not res.has_key('FailedJobIDs')):
                gLogger.info("Found %s jobs which did not exist in the WMS" %
                             len(res['InvalidJobIDs']))
            elif res.has_key('NonauthorizedJobIDs'):
                gLogger.error(
                    "Failed to remove %s jobs because not authorized" %
                    len(res['NonauthorizedJobIDs']))
                allRemove = False
            elif res.has_key('FailedJobIDs'):
                gLogger.error("Failed to remove %s jobs" %
                              len(res['FailedJobIDs']))
                allRemove = False
        if not allRemove:
            return S_ERROR("Failed to remove all remnants from WMS")
        gLogger.info("Successfully removed all tasks from the WMS")
        res = self.requestClient.getRequestForJobs(jobIDs)
        if not res['OK']:
            gLogger.error("Failed to get requestID for jobs.", res['Message'])
            return res
        failoverRequests = res['Value']
        gLogger.info("Found %d jobs with associated failover requests" %
                     len(failoverRequests))
        if not failoverRequests:
            return S_OK()
        failed = 0
        for jobID, requestName in failoverRequests.items():
            res = self.requestClient.deleteRequest(requestName)
            if not res['OK']:
                gLogger.error("Failed to remove request from RequestDB",
                              res['Message'])
                failed += 1
            else:
                gLogger.verbose("Removed request %s associated to job %d." %
                                (requestName, jobID))
        if failed:
            gLogger.info("Successfully removed %s requests" %
                         (len(failoverRequests) - failed))
            gLogger.info("Failed to remove %s requests" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        gLogger.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Exemplo n.º 15
0
class TransformationAgent( AgentModule ):

  def initialize( self ):
    """ standard init
    """
    self.pluginLocation = self.am_getOption( 'PluginLocation',
                                             'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' )
    self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] )
    self.maxFiles = self.am_getOption( 'MaxFiles', 5000 )

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.transDB = TransformationClient( 'TransformationDB' )
    self.rm = ReplicaManager()
    self.unusedFiles = {}
    return S_OK()

  def execute( self ):
    """ get and process the transformations to be processed
    """
    res = self.getTransformations()
    if not res['OK']:
      gLogger.info( "execute: Failed to obtain transformations: %s" % res['Message'] )
      return S_OK()
    # Process the transformations
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.info( "execute: Processing transformation %s." % transID )
      startTime = time.time()
      res = self.processTransformation( transDict )
      if not res['OK']:
        gLogger.info( "execute: Failed to process transformation: %s" % res['Message'] )
      else:
        gLogger.info( "execute: Processed transformation in %.1f seconds" % ( time.time() - startTime ) )
    return S_OK()

  def getTransformations( self ):
    """ Obtain the transformations to be executed 
    """
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      gLogger.info( "getTransformations: Initializing general purpose agent." )
      res = self.transDB.getTransformations( {'Status':self.transformationStatus}, extraParams = True )
      if not res['OK']:
        gLogger.error( "getTransformations: Failed to get transformations: %s" % res['Message'] )
        return res
      transformations = res['Value']
      gLogger.info( "getTransformations: Obtained %d transformations to process" % len( transformations ) )
    else:
      gLogger.info( "getTransformations: Initializing for transformation %s." % transName )
      res = self.transDB.getTransformation( transName, extraParams = True )
      if not res['OK']:
        gLogger.error( "getTransformations: Failed to get transformation: %s." % res['Message'] )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def processTransformation( self, transDict ):
    transID = transDict['TransformationID']
    # First get the LFNs associated to the transformation
    res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} )
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to obtain input data: %s." % res['Message'] )
      return res
    transFiles = res['Value']
    lfns = res['LFNs']

    if not lfns:
      gLogger.info( "processTransformation: No 'Unused' files found for transformation." )
      if transDict['Status'] == 'Flush':
        res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
        if not res['OK']:
          gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] )
        else:
          gLogger.info( "processTransformation: Updated transformation status to 'Active'." )
      return S_OK()
    #Check if something new happened
    if len( lfns ) == self.unusedFiles.get( transID, 0 ) and transDict['Status'] != 'Flush':
      gLogger.info( "processTransformation: No new 'Unused' files found for transformation." )
      return S_OK()

    replicateOrRemove = transDict['Type'].lower() in ["replication", "removal"]
    # Limit the number of LFNs to be considered for replication or removal as they are treated individually
    if replicateOrRemove:
      lfns = lfns[0:self.maxFiles - 1]
    unusedFiles = len( lfns )
    # Check the data is available with replicas
    res = self.__getDataReplicas( transID, lfns, active = not replicateOrRemove )
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to get data replicas: %s" % res['Message'] )
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    plugin = 'Standard'
    if transDict.has_key( 'Plugin' ) and transDict['Plugin']:
      plugin = transDict['Plugin']
    gLogger.info( "processTransformation: Processing transformation with '%s' plug-in." % plugin )
    res = self.__generatePluginObject( plugin )
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters( transDict )
    oPlugin.setInputData( dataReplicas )
    oPlugin.setTransformationFiles( transFiles )
    res = oPlugin.generateTasks()
    if not res['OK']:
      gLogger.error( "processTransformation: Failed to generate tasks for transformation: %s" % res['Message'] )
      return res
    tasks = res['Value']
    # Create the tasks
    allCreated = True
    created = 0
    for se, lfns in tasks:
      res = self.transDB.addTaskForTransformation( transID, lfns, se )
      if not res['OK']:
        gLogger.error( "processTransformation: Failed to add task generated by plug-in: %s." % res['Message'] )
        allCreated = False
      else:
        created += 1
        unusedFiles -= len( lfns )
    if created:
      gLogger.info( "processTransformation: Successfully created %d tasks for transformation." % created )
    self.unusedFiles[transID] = unusedFiles

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
      if not res['OK']:
        gLogger.error( "processTransformation: Failed to update transformation status to 'Active': %s." % res['Message'] )
      else:
        gLogger.info( "processTransformation: Updated transformation status to 'Active'." )
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def __generatePluginObject( self, plugin ):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] )
    except ImportError, e:
      gLogger.exception( "__generatePluginObject: Failed to import 'TransformationPlugin' %s: %s" % ( plugin, e ) )
      return S_ERROR()
    try:
      plugin_o = getattr( plugModule, 'TransformationPlugin' )( '%s' % plugin,
                                                                transClient = self.transDB,
                                                                replicaManager = self.rm )
      return S_OK( plugin_o )
    except AttributeError, e:
      gLogger.exception( "__generatePluginObject: Failed to create %s(): %s." % ( plugin, e ) )
      return S_ERROR()
Exemplo n.º 16
0
class TransformationCleaningAgent( AgentModule ):
  """
  .. class:: TransformationCleaningAgent

  :param DataManger dm: DataManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  """

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    # # data manager
    self.dm = None
    # # transformation client
    self.transClient = None
    # # wms client
    self.wmsClient = None
    # # request client
    self.reqClient = None
    # # file catalog client
    self.metadataClient = None

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    """ agent initialisation

    reading and setting confing opts

    :param self: self reference
    """
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    self.dataProcTTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
    self.dataManipTTypes = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      self.transformationTypes = sorted( self.dataProcTTypes + self.dataManipTTypes )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )

    # # data manager
#     self.dm = DataManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.reqClient = ReqClient()
    # # file catalog client
    self.metadataClient = FileCatalogClient()

    return S_OK()

  #############################################################################
  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in self.dataManipTTypes:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      if type( res['Value'] ) != type( [] ):
        transDirectories = ast.literal_eval( res['Value'] )
      else:
        transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )
  # FIXME If a classmethod, should it not have cls instead of self?
  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    """ append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    """
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    """ delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    """
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    """ wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    """
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )

    se = StorageElement( storageElement )

    res = se.getPfnForLfn( [directory] )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    if directory in res['Value']['Failed']:
      self.log.verbose( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, res['Value']['Failed'][directory] ) )
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'][directory]

    res = returnSingleResult( se.exists( storageDirectory ) )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = returnSingleResult( se.removeDirectory( storageDirectory, recursive = True ) )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    """ wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    """
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      self.log.info( "No files are registered in the catalog directory %s" % directory )
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( filesFound, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    realFailure = False
    for lfn, reason in res['Value']['Failed'].items():
      if "File does not exist" in str( reason ):
        self.log.warn( "File %s not found in some catalog: " % ( lfn ) )
      else:
        self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
        realFailure = True
    if realFailure:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = returnSingleResult( fc.listDirectory( currentDir ) )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info( "%s: %s" % ( currentDir, res['Message'] ) )
        else:
          self.log.error( "Failed to get directory %s content: %s" % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    """ clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    """
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = returnSingleResult( StorageElement( self.logSE ).removeDirectory( directory ) )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    """
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
    """
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Cleaned' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Cleaned" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Cleaned" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    """ wipe out files from catalog """
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()

    # Executing with shifter proxy
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
    res = DataManager().removeFile( fileToRemove, force = True )
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )

    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    """ clean tasks from WMS, or from the RMS if it is a DataManipulation transformation
    """
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in self.dataProcTTypes:
        res = self.__removeWMSTasks( externalIDs )
      else:
        res = self.__removeRequests( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    """ collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    """
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    """ This will remove requests from the (new) RMS system -

        #FIXME: if the old system is still installed, it won't remove anything!!!
        (we don't want to risk removing from the new RMS what is instead in the old)
    """
    # FIXME: checking if the old system is still installed!
    from DIRAC.ConfigurationSystem.Client import PathFinder
    if PathFinder.getServiceURL( "RequestManagement/RequestManager" ):
      self.log.warn( "NOT removing requests!!" )
      return S_OK()

    rIDs = [ int( long( j ) ) for j in requestIDs if long( j ) ]
    for requestName in rIDs:
      self.reqClient.deleteRequest( requestName )

    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    """ wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    """
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    failed = 0
    # FIXME: double request client: old/new -> only the new will survive sooner or later
    # this is the old
    try:
      res = RequestClient().getRequestForJobs( jobIDs )
      if not res['OK']:
        self.log.error( "Failed to get requestID for jobs.", res['Message'] )
        return res
      failoverRequests = res['Value']
      self.log.info( "Found %d jobs with associated failover requests (in the old RMS)" % len( failoverRequests ) )
      if not failoverRequests:
        return S_OK()
      for jobID, requestName in failoverRequests.items():
        # Put this check just in case, tasks must have associated jobs
        if jobID == 0 or jobID == '0':
          continue
        res = RequestClient().deleteRequest( requestName )
        if not res['OK']:
          self.log.error( "Failed to remove request from RequestDB", res['Message'] )
          failed += 1
        else:
          self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    except RuntimeError:
      failoverRequests = {}
      pass

    # FIXME: and this is the new
    res = self.reqClient.getRequestNamesForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests.update( res['Value']['Successful'] )
    if not failoverRequests:
      return S_OK()
    for jobID, requestName in res['Value']['Successful'].items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.reqClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )


    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 17
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """c'tor"""
        super(Transformation, self).__init__()

        self.paramTypes = {
            "TransformationID": six.integer_types,
            "TransformationName": six.string_types,
            "Status": six.string_types,
            "Description": six.string_types,
            "LongDescription": six.string_types,
            "Type": six.string_types,
            "Plugin": six.string_types,
            "AgentType": six.string_types,
            "FileMask": six.string_types,
            "TransformationGroup": six.string_types,
            "GroupSize": six.integer_types + (float, ),
            "InheritedFrom": six.integer_types,
            "Body": six.string_types,
            "MaxNumberOfTasks": six.integer_types,
            "EventsPerTask": six.integer_types,
        }
        self.paramValues = {
            "TransformationID": 0,
            "TransformationName": "",
            "Status": "New",
            "Description": "",
            "LongDescription": "",
            "Type": "",
            "Plugin": "Standard",
            "AgentType": "Manual",
            "FileMask": "",
            "TransformationGroup": "General",
            "GroupSize": 1,
            "InheritedFrom": 0,
            "Body": "",
            "MaxNumberOfTasks": 0,
            "EventsPerTask": 0,
        }

        # the metaquery parameters are neither part of the transformation parameters nor the additional parameters, so
        # special treatment is necessary
        self.inputMetaQuery = None
        self.outputMetaQuery = None

        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            "Transformations/AllowedPlugins",
            ["Broadcast", "Standard", "BySize", "ByShare"])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues["TransformationID"] = transID
            res = self.getTransformation()
            if res["OK"]:
                self.exists = True
            elif res["Message"] == "Transformation does not exist":
                raise AttributeError("TransformationID %d does not exist" %
                                     transID)
            else:
                self.paramValues["TransformationID"] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE("TargetSE", seList)

    def setSourceSE(self, seList):
        return self.__setSE("SourceSE", seList)

    def setBody(self, body):
        """check that the body is a string, or using the proper syntax for multiple operations,
        or is a BodyPlugin object

        :param body: transformation body, for example

          .. code :: python

            body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                     ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
                   ]

        :type body: string or list of tuples (or lists) of string and dictionaries or a Body plugin (:py:class:`DIRAC.TransformationSystem.Client.BodyPlugin.BaseBody.BaseBody`)
        :raises TypeError: If the structure is not as expected
        :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation`
                            is used
        :returns: S_OK, S_ERROR
        """
        self.item_called = "Body"

        # Simple single operation body case
        if isinstance(body, six.string_types):
            return self.__setParam(body)

        # BodyPlugin case
        elif isinstance(body, BaseBody):
            return self.__setParam(encode(body))

        if not isinstance(body, (list, tuple)):
            raise TypeError("Expected list or string, but %r is %s" %
                            (body, type(body)))

        # MultiOperation body case
        for tup in body:
            if not isinstance(tup, (tuple, list)):
                raise TypeError("Expected tuple or list, but %r is %s" %
                                (tup, type(tup)))
            if len(tup) != 2:
                raise TypeError("Expected 2-tuple, but %r is length %d" %
                                (tup, len(tup)))
            if not isinstance(tup[0], six.string_types):
                raise TypeError(
                    "Expected string, but first entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            if not isinstance(tup[1], dict):
                raise TypeError(
                    "Expected dictionary, but second entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            for par, val in tup[1].items():
                if not isinstance(par, six.string_types):
                    raise TypeError(
                        "Expected string, but key in dictionary %r is %s" %
                        (par, type(par)))
                if par not in Operation.ATTRIBUTE_NAMES:
                    raise ValueError("Unknown attribute for Operation: %s" %
                                     par)
                if not isinstance(
                        val, six.string_types + six.integer_types +
                    (float, list, tuple, dict)):
                    raise TypeError("Cannot encode %r, in json" % (val))
        return self.__setParam(json.dumps(body))

    def setInputMetaQuery(self, query):
        """Set the input meta query.

        :param dict query: dictionary to use for input meta query
        """
        self.inputMetaQuery = query
        return S_OK()

    def setOutputMetaQuery(self, query):
        """Set the output meta query.

        :param dict query: dictionary to use for output meta query
        """
        self.outputMetaQuery = query
        return S_OK()

    def __setSE(self, seParam, seList):
        if isinstance(seList, six.string_types):
            try:
                seList = eval(seList)
            except Exception:
                seList = seList.split(",")
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res["OK"]:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find("get") == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find("set") == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == "Available":
            return S_OK(list(self.paramTypes))
        if self.item_called == "Parameters":
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if isinstance(value, self.paramTypes[self.item_called]):
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues["TransformationID"]
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res["OK"]:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res["Value"]
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res["Value"]
        if printOutput:
            self._printFormattedDictList(
                loggingList, ["Message", "MessageDate", "AuthorDN"],
                "MessageDate", "MessageDate")
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation("extendTransformation",
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation("cleanTransformation",
                                      printOutput=printOutput)
        if res["OK"]:
            self.paramValues["Status"] = "Cleaned"
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation("deleteTransformation",
                                      printOutput=printOutput)
        if res["OK"]:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation("addFilesToTransformation",
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation("setFileStatusForTransformation",
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation("getTransformationTaskStats",
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation("getTransformationStats",
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation("deleteTasks",
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se="Unknown",
                                 printOutput=False):
        return self.__executeOperation("addTaskForTransformation",
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation("setTaskStatus",
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop("printOutput")
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(
        self,
        fileStatus=[],
        lfns=[],
        outputFields=[
            "FileID",
            "LFN",
            "Status",
            "TaskID",
            "TargetSE",
            "UsedSE",
            "ErrorCount",
            "InsertedTime",
            "LastUpdate",
        ],
        orderBy="FileID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if fileStatus:
            condDict["Status"] = fileStatus
        if lfns:
            condDict["LFN"] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "FileID", orderBy)
        return res

    def getTransformationTasks(
        self,
        taskStatus=[],
        taskIDs=[],
        outputFields=[
            "TransformationID",
            "TaskID",
            "ExternalStatus",
            "ExternalID",
            "TargetSE",
            "CreationTime",
            "LastUpdateTime",
        ],
        orderBy="TaskID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if taskStatus:
            condDict["ExternalStatus"] = taskStatus
        if taskIDs:
            condDict["TaskID"] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TaskID", orderBy)
        return res

    #############################################################################
    def getTransformations(
        self,
        transID=[],
        transStatus=[],
        outputFields=[
            "TransformationID", "Status", "AgentType", "TransformationName",
            "CreationDate"
        ],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TransformationID", orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """gets the AuthorDN and username of the transformation from the uploaded proxy"""
        username = ""
        author = ""
        res = getProxyInfo()
        if res["OK"]:
            author = res["Value"]["identity"]
            username = res["Value"]["username"]
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res["Message"])
            return S_ERROR(res["Message"])

        res = {"username": username, "authorDN": author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(
        self,
        authorDN="",
        userName="",
        transID=[],
        transStatus=[],
        outputFields=[
            "TransformationID", "Status", "AgentType", "TransformationName",
            "CreationDate", "AuthorDN"
        ],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res["OK"]:
                gLogger.error(res["Message"])
                return S_ERROR(res["Message"])
            else:
                foundUserName = res["Value"]["username"]
                foundAuthor = res["Value"]["authorDN"]
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ", ".join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ", ".join(transStatus)))

        condDict["AuthorDN"] = authorDN
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields,
                                             "TransformationID", orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

        Fields starting with 'F' ('J')  refers to files (jobs).
        Proc. stand for processed.
        """
        condDict = {"TransformationID": transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = [
            "TransformationID",
            "Type",
            "Status",
            "Files_Total",
            "Files_PercentProcessed",
            "Files_Processed",
            "Files_Unused",
            "Jobs_TotalCreated",
            "Jobs_Waiting",
            "Jobs_Running",
            "Jobs_Done",
            "Jobs_Failed",
            "Jobs_Stalled",
        ]
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = [
            "TransID",
            "Type",
            "Status",
            "F_Total",
            "F_Proc.(%)",
            "F_Proc.",
            "F_Unused",
            "J_Created",
            "J_Wait",
            "J_Run",
            "J_Done",
            "J_Fail",
            "J_Stalled",
        ]
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result["OK"]:
            self._prettyPrint(result)
            return result

        if result["Value"]["TotalRecords"] > 0:
            try:
                paramNames = result["Value"]["ParameterNames"]
                for paramValues in result["Value"]["Records"]:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception as x:
                print("Exception %s " % str(x))

        if not len(dictList) > 0:
            gLogger.error(
                "No found transformations satisfying input condition")
            return S_ERROR(
                "No found transformations satisfying input condition")
        else:
            print(
                self._printFormattedDictList(dictList, paramShowNamesShort,
                                             paramShowNamesShort[0],
                                             paramShowNamesShort[0]))

        return S_OK(dictList)

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        """Add transformation to the transformation system.

        Sets all parameters currently assigned to the transformation.

        :param bool addFiles: if True, immediately perform input data query
        :param bool printOutput: if True, print information about transformation
        """
        res = self._checkCreation()
        if not res["OK"]:
            return self._errorReport(res, "Failed transformation sanity check")
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues["TransformationName"],
            self.paramValues["Description"],
            self.paramValues["LongDescription"],
            self.paramValues["Type"],
            self.paramValues["Plugin"],
            self.paramValues["AgentType"],
            self.paramValues["FileMask"],
            transformationGroup=self.paramValues["TransformationGroup"],
            groupSize=self.paramValues["GroupSize"],
            inheritedFrom=self.paramValues["InheritedFrom"],
            body=self.paramValues["Body"],
            maxTasks=self.paramValues["MaxNumberOfTasks"],
            eventsPerTask=self.paramValues["EventsPerTask"],
            addFiles=addFiles,
            inputMetaQuery=self.inputMetaQuery,
            outputMetaQuery=self.outputMetaQuery,
        )
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res["Value"]
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if paramName not in self.paramTypes:
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res["OK"]:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res["Message"]))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """Few checks"""
        if self.paramValues["TransformationID"]:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            "TransformationName", "Description", "LongDescription", "Type"
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = six.moves.input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues["Plugin"]
        if plugin:
            if plugin not in self.supportedPlugins:
                gLogger.info(
                    "The selected Plugin (%s) is not known to the transformation agent."
                    % plugin)
                res = self.__promptForParameter("Plugin",
                                                choices=self.supportedPlugins,
                                                default="Standard")
                if not res["OK"]:
                    return res
                self.paramValues["Plugin"] = res["Value"]

        plugin = self.paramValues["Plugin"]

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues["GroupSize"]
        if groupSize <= 0:
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res["OK"]:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (", ".join(["SourceSE", "TargetSE"])))
        requiredParams = ["SourceSE", "TargetSE"]
        for requiredParam in requiredParams:
            if not self.paramValues.get(requiredParam):
                paramValue = six.moves.input("Please enter " + requiredParam +
                                             " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(",", " ").split()
                res = setter(ses)
                if not res["OK"]:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections("/Resources/StorageElements")
        if not res["OK"]:
            return self._errorReport(res,
                                     "Failed to get possible StorageElements")
        missing = set(seList) - set(res["Value"])
        if missing:
            for se in missing:
                gLogger.error("StorageElement %s is not known" % se)
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default="",
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res["OK"]:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"]))
        paramValue = res["Value"]
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res["OK"]:
                return res
        return S_OK(paramValue)
class TransformationCleaningAgent( AgentModule ):

  #############################################################################
  def initialize( self ):
    """Sets defaults """
    self.replicaManager = ReplicaManager()
    self.transClient = TransformationClient()
    self.wmsClient = WMSClient()
    self.requestClient = RequestClient()
    self.metadataClient = FileCatalogClient()
    self.storageUsageClient = StorageUsageClient()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.transformationTypes = sortList( self.am_getOption( 'TransformationTypes', ['MCSimulation', 'DataReconstruction', 'DataStripping', 'MCStripping', 'Merge', 'Replication'] ) )
    gLogger.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'StorageUsage', 'MetadataCatalog'] ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 ) # days
    gLogger.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    gLogger.info( "Will remove logs found on storage element: %s" % self.logSE )
    return S_OK()

  #############################################################################
  def execute( self ):
    """ The TransformationCleaningAgent execution method.
    """
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option %s/EnableFlag' % ( self.section ) )
      return S_OK( 'Disabled via CS flag' )

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( {'Status':'Cleaning', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        self.cleanTransformation( transDict['TransformationID'] )

    # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( {'Status':'RemovingFiles', 'Type':self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        self.removeTransformationOutput( transDict['TransformationID'] )

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( {'Status':'Completed', 'Type':self.transformationTypes}, older = olderThanTime )
    if res['OK']:
      for transDict in res['Value']:
        self.archiveTransformation( transDict['TransformationID'] )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ Get the directories for the supplied transformation from the transformation system """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        gLogger.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self.__addDirs( transID, transDirectories, directories )

    if 'StorageUsage' in self.directoryLocations:
      res = self.storageUsageClient.getStorageDirectories( '', '', transID, [] )
      if not res['OK']:
        gLogger.error( "Failed to obtain storage usage directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self.__addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self.__addDirs( transID, transDirectories, directories )
    if not directories:
      gLogger.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  def __addDirs( self, transID, newDirs, existingDirs ):
    for dir in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, dir ):
        if not dir in existingDirs:
          existingDirs.append( dir )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    gLogger.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      gLogger.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      gLogger.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory, storageElement, recursive = True, singleDirectory = True )
    if not res['OK']:
      gLogger.error( "Failed to remove storage directory", res['Message'] )
      return res
    gLogger.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    gLogger.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      gLogger.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    gLogger.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      gLogger.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        gLogger.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    gLogger.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    gLogger.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      gLogger.error( "Failed to remove log files", res['Message'] )
      return res
    gLogger.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    """ This just removes any mention of the output data from the catalog and storage """
    gLogger.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    gLogger.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    gLogger.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    """ This just removes job from the jobDB and the transformation DB """
    gLogger.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    gLogger.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    """ This removes any mention of the supplied transformation 
    """
    gLogger.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      gLogger.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    gLogger.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      gLogger.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    gLogger.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID, directories ):
    res = self.metadataClient.findFilesByMetadata( {self.transfidmeta:transID} )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not len(fileToRemove):
      gLogger.info('No files found for transID %s'%transID)
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      gLogger.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    gLogger.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        gLogger.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType == 'Replication':
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    res = self.transClient.getTransformationTasks( condDict = {'TransformationID':transID} )
    if not res['OK']:
      gLogger.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = []
    for taskDict in res['Value']:
      externalIDs.append( taskDict['ExternalID'] )
    gLogger.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    gLogger.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, jobIDs ):
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):
      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        gLogger.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( res.has_key( 'InvalidJobIDs' ) ) and ( not res.has_key( 'NonauthorizedJobIDs' ) ) and ( not res.has_key( 'FailedJobIDs' ) ):
        gLogger.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif res.has_key( 'NonauthorizedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif res.has_key( 'FailedJobIDs' ):
        gLogger.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False
    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    gLogger.info( "Successfully removed all tasks from the WMS" )
    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      gLogger.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    gLogger.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        gLogger.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        gLogger.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      gLogger.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      gLogger.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    gLogger.info( "Successfully removed all the associated failover requests" )
    return S_OK()
    selectDict = {'TransformationID': res['Value']['TransformationID']}
    if status:
        selectDict['Status'] = status
    res = tc.getTransformationFiles(condDict=selectDict)
    if not res['OK']:
        gLogger.error('Failed to get transformation files: %s' % res['Message'])
        continue
    if not res['Value']:
        gLogger.debug('No file found for transformation %s' % t)
        continue

    lfns = [f['LFN'] for f in res['Value']]

    gLogger.notice('Reset files for status: %s' % status)
    res = tc.setFileStatusForTransformation(t, 'Unused', lfns)
    if not res['OK']:
        gLogger.error('Failed to reset file status: %s' % res['Message'])
        continue
    if 'Failed' in res['Value']:
        gLogger.warn('Could not reset some files: ')
        for lfn, reason in res['Value']['Failed'].items():
          gLogger.warn('%s: %s' % (lfn, reason))

    gLogger.notice('Updated file statuses to "Unused" for %d file(s)' % len(lfns))

    result = tc.setTransformationParameter(t, 'Status', 'Flush')
    if not result['OK']:
        gLogger.error('Can not flush transformation: %s' % result['Message'])
        continue
Exemplo n.º 20
0
class TransformationCleaningAgent( AgentModule ):
  '''
  .. class:: TransformationCleaningAgent

  :param ReplicaManger replicaManager: ReplicaManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param RequestClient requestClient: RequestClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    ''' agent initialisation

    reading and setting confing opts

    :param self: self reference
    '''
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    return S_OK()

  #############################################################################
  def execute( self ):
    ''' execution in one agent's cycle

    :param self: self reference
    '''

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    ''' get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    '''
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    ''' append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    '''
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    ''' delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    '''
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    ''' wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    '''
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory,
                                                      storageElement,
                                                      recursive = True,
                                                      singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    ''' wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    '''
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    ''' get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    '''
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    ''' clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    '''
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    ''' This just removes any mention of the output data from the catalog and storage '''
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    ''' This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    '''
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    ''' This removes any mention of the supplied transformation
    '''
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    ''' wipe out files from catalog '''
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove, force = True )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    ''' clean tasks from WMS
    '''
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in [ 'Replication', 'Removal' ]:
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    ''' collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    '''
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    ''' dummy method '''
    self.log.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    ''' wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    '''
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 21
0
class TransformationCLI(cmd.Cmd, API):
    def __init__(self):
        self.server = TransformationClient()
        self.indentSpace = 4
        cmd.Cmd.__init__(self)

    def printPair(self, key, value, separator=":"):
        valueList = value.split("\n")
        print "%s%s%s %s" % (key, " " * (self.indentSpace - len(key)),
                             separator, valueList[0].strip())
        for valueLine in valueList[1:-1]:
            print "%s  %s" % (" " * self.indentSpace, valueLine.strip())

    def do_exit(self, args):
        """ Exits the shell.
        usage: exit
    """
        sys.exit(0)

    def do_quit(self, *args):
        """ Exits the shell.
        Usage: quit
    """
        sys.exit(0)

    def do_help(self, args):
        """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commans"""
        cmd.Cmd.do_help(self, args)

    # overriting default help command
    def do_helpall(self, args):
        """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
        if len(args) == 0:
            print "\nAvailable commands:\n"
            attrList = dir(self)
            attrList.sort()
            for attribute in attrList:
                if attribute.find("do_") == 0:
                    self.printPair(attribute[3:],
                                   getattr(self, attribute).__doc__[1:])
                    print ""
        else:
            command = args.split()[0].strip()
            try:
                obj = getattr(self, "do_%s" % command)
            except:
                print "There's no such %s command" % command
                return
            self.printPair(command, obj.__doc__[1:])

    def do_shell(self, args):
        """Execute a shell command

       usage !<shell_command>
    """
        comm = args
        res = shellCall(0, comm)
        if res['OK'] and res['Value'][0] == 0:
            returnCode, stdOut, stdErr = res['Value']
            print "%s\n%s" % (stdOut, stdErr)
        else:
            print res['Message']

    def check_params(self, args, num):
        """Checks if the number of parameters correct"""
        argss = string.split(args)
        length = len(argss)
        if length < num:
            print "Error: Number of arguments provided %d less that required %d, please correct." % (
                length, num)
            return (False, length)
        return (argss, length)

    def check_id_or_name(self, id_or_name):
        """resolve name or Id by converting type of argument """
        if id_or_name.isdigit():
            return long(id_or_name)  # its look like id
        return id_or_name

    def do_setServer(self, args):
        """ Set the destination server

        usage: setServer serverURL
    """
        argss = string.split(args)
        if len(argss) == 0:
            print "no server provided"
        self.serverURL = argss[0]
        self.server.setServer(self.serverURL)

    ####################################################################
    #
    # These are the methods for transformation manipulation
    #

    def do_getall(self, args):
        """Get transformation details

       usage: getall [Status] [Status]
    """
        oTrans = Transformation()
        oTrans.setServer(self.serverURL)
        oTrans.getTransformations(transStatus=string.split(args),
                                  printOutput=True)

    def do_getStatus(self, args):
        """Get transformation details

       usage: getStatus <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.getTransformation(transName)
            if not res['OK']:
                print "Getting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s: %s" % (transName, res['Value']['Status'])

    def do_setStatus(self, args):
        """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
        argss = string.split(args)
        if not len(argss) > 1:
            print "transformation and status not supplied"
            return
        status = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, 'Status', status)
            if not res['OK']:
                print "Setting status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                print "%s set to %s" % (transName, status)

    def do_start(self, args):
        """Start transformation

       usage: start <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Active')
            if not res['OK']:
                print "Setting Status of %s failed: %s" % (transName,
                                                           res['Message'])
            else:
                res = self.server.setTransformationParameter(
                    transName, 'AgentType', 'Automatic')
                if not res['OK']:
                    print "Setting AgentType of %s failed: %s" % (
                        transName, res['Message'])
                else:
                    print "%s started" % transName

    def do_stop(self, args):
        """Stop transformation

       usage: stop <transID|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'AgentType', 'Manual')
            if not res['OK']:
                print "Stopping of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s stopped" % transName

    def do_flush(self, args):
        """Flush transformation

       usage: flush <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        for transName in argss:
            res = self.server.setTransformationParameter(
                transName, 'Status', 'Flush')
            if not res['OK']:
                print "Flushing of %s failed: %s" % (transName, res['Message'])
            else:
                print "%s flushing" % transName

    def do_get(self, args):
        """Get transformation definition

    usage: get <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            res['Value'].pop('Body')
            printDict(res['Value'])

    def do_getBody(self, args):
        """Get transformation body

    usage: getBody <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get %s: %s" % (transName, res['Message'])
        else:
            print res['Value']['Body']

    def do_getFileStat(self, args):
        """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        res = self.server.getTransformationStats(transName)
        if not res['OK']:
            print "Failed to get statistics for %s: %s" % (transName,
                                                           res['Message'])
        else:
            res['Value'].pop('Total')
            printDict(res['Value'])

    def do_modMask(self, args):
        """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        mask = argss[0]
        transNames = argss[1:]
        for transName in transNames:
            res = self.server.setTransformationParameter(
                transName, "FileMask", mask)
            if not res['OK']:
                print "Failed to modify input file mask for %s: %s" % (
                    transName, res['Message'])
            else:
                print "Updated %s filemask" % transName

    def do_getFiles(self, args):
        """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no transformation supplied"
            return
        transName = argss[0]
        status = argss[1:]
        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            if status:
                selectDict['Status'] = status
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                self._printFormattedDictList(
                    res['Value'],
                    ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                    'LFN', 'LFN')
            else:
                print "No files found"

    def do_getFileStatus(self, args):
        """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
        argss = string.split(args)
        if len(argss) < 2:
            print "transformation and file not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]

        res = self.server.getTransformation(transName)
        if not res['OK']:
            print "Failed to get transformation information: %s" % res[
                'Message']
        else:
            selectDict = {'TransformationID': res['Value']['TransformationID']}
            res = self.server.getTransformationFiles(condDict=selectDict)
            if not res['OK']:
                print "Failed to get transformation files: %s" % res['Message']
            elif res['Value']:
                filesList = []
                for fileDict in res['Value']:
                    if fileDict['LFN'] in lfns:
                        filesList.append(fileDict)
                if filesList:
                    self._printFormattedDictList(filesList, [
                        'LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'
                    ], 'LFN', 'LFN')
                else:
                    print "Could not find any LFN in", lfns, "for transformation", transName
            else:
                print "No files found"

    def do_setFileStatus(self, args):
        """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
        argss = string.split(args)
        if not len(argss) == 3:
            print "transformation file and status not supplied"
            return
        transName = argss[0]
        lfn = argss[1]
        status = argss[2]
        res = self.server.setFileStatusForTransformation(
            transName, status, [lfn])
        if not res['OK']:
            print "Failed to update file status: %s" % res['Message']
        else:
            print "Updated file status to %s" % status

    def do_resetFile(self, args):
        """Reset file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn>
    """
        argss = string.split(args)
        if not len(argss) > 1:
            print "transformation and file(s) not supplied"
            return
        transName = argss[0]
        lfns = argss[1:]
        res = self.server.setFileStatusForTransformation(
            transName, 'Unused', lfns)
        if not res['OK']:
            print "Failed to reset file status: %s" % res['Message']
        else:
            print "Updated file statuses to 'Unused' for %d file(s)" % len(
                lfns)

    ####################################################################
    #
    # These are the methods for file manipulation
    #

    def do_addDirectory(self, args):
        """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no directory supplied"
            return
        for directory in argss:
            res = self.server.addDirectory(directory, force=True)
            if not res['OK']:
                print 'failed to add directory %s: %s' % (directory,
                                                          res['Message'])
            else:
                print 'added %s files for %s' % (res['Value'], directory)

    def do_replicas(self, args):
        """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.getReplicas(argss)
        if not res['OK']:
            print "failed to get any replica information: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to get replica information for %s: %s" % (lfn, error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            ses = sortList(res['Value']['Successful'][lfn].keys())
            outStr = "%s :" % lfn.ljust(100)
            for se in ses:
                outStr = "%s %s" % (outStr, se.ljust(15))
            print outStr

    def do_addFile(self, args):
        """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no files supplied"
            return
        lfnDict = {}
        for lfn in argss:
            lfnDict[lfn] = {
                'PFN': 'IGNORED-PFN',
                'SE': 'IGNORED-SE',
                'Size': 0,
                'GUID': 'IGNORED-GUID',
                'Checksum': 'IGNORED-CHECKSUM'
            }
        res = self.server.addFile(lfnDict, force=True)
        if not res['OK']:
            print "failed to add any files: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add %s: %s" % (lfn, error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeFile(self, args):
        """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
        argss = string.split(args)
        if not len(argss) > 0:
            print "no files supplied"
            return
        res = self.server.removeFile(argss)
        if not res['OK']:
            print "failed to remove any files: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove %s: %s" % (lfn, error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_addReplica(self, args):
        """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
        argss = string.split(args)
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.addReplica(lfnDict, force=True)
        if not res['OK']:
            print "failed to add replica: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to add replica: %s" % (error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "added %s" % lfn

    def do_removeReplica(self, args):
        """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
        argss = string.split(args)
        if not len(argss) == 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        se = argss[1]
        lfnDict = {}
        lfnDict[lfn] = {
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.removeReplica(lfnDict)
        if not res['OK']:
            print "failed to remove replica: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to remove replica: %s" % (error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "removed %s" % lfn

    def do_setReplicaStatus(self, args):
        """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
        argss = string.split(args)
        if not len(argss) > 2:
            print "no file info supplied"
            return
        lfn = argss[0]
        status = argss[1]
        se = argss[2]
        lfnDict = {}
        lfnDict[lfn] = {
            'Status': status,
            'PFN': 'IGNORED-PFN',
            'SE': se,
            'Size': 0,
            'GUID': 'IGNORED-GUID',
            'Checksum': 'IGNORED-CHECKSUM'
        }
        res = self.server.setReplicaStatus(lfnDict)
        if not res['OK']:
            print "failed to set replica status: %s" % res['Message']
            return
        for lfn in sortList(res['Value']['Failed'].keys()):
            error = res['Value']['Failed'][lfn]
            print "failed to set replica status: %s" % (error)
        for lfn in sortList(res['Value']['Successful'].keys()):
            print "updated replica status %s" % lfn
Exemplo n.º 22
0
class TransformationCleaningAgent( AgentModule ):
  '''
  .. class:: TransformationCleaningAgent

  :param ReplicaManger replicaManager: ReplicaManager instance
  :param TransfromationClient transClient: TransfromationClient instance
  :param RequestClient requestClient: RequestClient instance
  :param FileCatalogClient metadataClient: FileCatalogClient instance

  '''

  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    AgentModule.__init__( self, *args, **kwargs )
    # # replica manager
    self.replicaManager = ReplicaManager()
    # # transformation client
    self.transClient = TransformationClient()
    # # wms client
    self.wmsClient = WMSClient()
    # # request client
    self.requestClient = RequestClient()
    # # file catalog clinet
    self.metadataClient = FileCatalogClient()

    # # placeholders for CS options

    # # transformations types
    self.transformationTypes = None
    # # directory locations
    self.directoryLocations = None
    # # transformation metadata
    self.transfidmeta = None
    # # archive periof in days
    self.archiveAfter = None
    # # active SEs
    self.activeStorages = None
    # # transformation log SEs
    self.logSE = None
    # # enable/disable execution
    self.enableFlag = None

  def initialize( self ):
    ''' agent initialisation

    reading and setting confing opts

    :param self: self reference
    '''
    # # shifter proxy
    self.am_setOption( 'shifterProxy', 'DataManager' )
    # # transformations types
    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sortList( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sortList( dataProc + dataManip )
    self.log.info( "Will consider the following transformation types: %s" % str( self.transformationTypes ) )
    # # directory locations
    self.directoryLocations = sortList( self.am_getOption( 'DirectoryLocations', [ 'TransformationDB',
                                                                                   'MetadataCatalog' ] ) )
    self.log.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    # # transformation metadata
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.log.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    # # archive periof in days
    self.archiveAfter = self.am_getOption( 'ArchiveAfter', 7 )  # days
    self.log.info( "Will archive Completed transformations after %d days" % self.archiveAfter )
    # # active SEs
    self.activeStorages = sortList( self.am_getOption( 'ActiveSEs', [] ) )
    self.log.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    # # transformation log SEs
    self.logSE = self.am_getOption( 'TransformationLogSE', 'LogSE' )
    self.log.info( "Will remove logs found on storage element: %s" % self.logSE )
    # # enable/disable execution, should be using CS option Status?? with default value as 'Active'??
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    return S_OK()

  #############################################################################
  def execute( self ):
    ''' execution in one agent's cycle

    :param self: self reference
    '''

    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' )
      return S_OK( 'Disabled via CS flag' )

    # # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations( { 'Status' : 'Cleaning',
                                                 'Type' : self.transformationTypes } )
    if res['OK']:
      for transDict in res['Value']:
        # # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # # We just archive
        if transDict[ 'Type' ] in [ 'Replication', 'Removal' ]:
          res = self.archiveTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                         res['Message'] ) )
        else:
          res = self.cleanTransformation( transDict['TransformationID'] )
          if not res['OK']:
            self.log.error( "Problems cleaning transformation %s: %s" % ( transDict['TransformationID'],
                                                                        res['Message'] ) )


    # # Obtain the transformations in RemovingFiles status and (wait for it) removes the output files
    res = self.transClient.getTransformations( { 'Status' : 'RemovingFiles',
                                                 'Type' : self.transformationTypes} )
    if res['OK']:
      for transDict in res['Value']:
        res = self.removeTransformationOutput( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems removing transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )

    # # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta( days = self.archiveAfter )
    res = self.transClient.getTransformations( { 'Status' : 'Completed',
                                                 'Type' : self.transformationTypes },
                                                 older = olderThanTime,
                                                 timeStamp = 'LastUpdate' )
    if res['OK']:
      for transDict in res['Value']:
        res = self.archiveTransformation( transDict['TransformationID'] )
        if not res['OK']:
          self.log.error( "Problems archiving transformation %s: %s" % ( transDict['TransformationID'],
                                                                       res['Message'] ) )
    else:
      self.log.error( "Could not get the transformations" )

    return S_OK()

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    ''' get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    '''
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        self.log.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.metadataClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        self.log.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )

    if not directories:
      self.log.info( "No output directories found" )
    directories = sortList( directories )
    return S_OK( directories )

  @classmethod
  def _addDirs( self, transID, newDirs, existingDirs ):
    ''' append uniqe :newDirs: list to :existingDirs: list

    :param self: self reference
    :param int transID: transformationID
    :param list newDirs: src list of paths
    :param list existingDirs: dest list of paths
    '''
    for folder in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, str( folder ) ):
        if not folder in existingDirs:
          existingDirs.append( folder )
    return existingDirs

  #############################################################################
  #
  # These are the methods for performing the cleaning of catalogs and storage
  #

  def cleanStorageContents( self, directory ):
    ''' delete lfn dir from all active SE

    :param self: self reference
    :param sre directory: folder name
    '''
    for storageElement in self.activeStorages:
      res = self.__removeStorageDirectory( directory, storageElement )
      if not res['OK']:
        return res
    return S_OK()

  def __removeStorageDirectory( self, directory, storageElement ):
    ''' wipe out all contents from :directory: at :storageElement:

    :param self: self reference
    :param str directory: path
    :param str storageElement: SE name
    '''
    self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) )
    res = self.replicaManager.getPfnForLfn( [directory], storageElement )
    if not res['OK']:
      self.log.error( "Failed to get PFN for directory", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      self.log.error( 'Failed to obtain directory PFN from LFN', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectory = res['Value']['Successful'].values()[0]
    res = self.replicaManager.getStorageFileExists( storageDirectory, storageElement, singleFile = True )
    if not res['OK']:
      self.log.error( "Failed to obtain existance of directory", res['Message'] )
      return res
    exists = res['Value']
    if not exists:
      self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) )
      return S_OK()
    res = self.replicaManager.removeStorageDirectory( storageDirectory,
                                                      storageElement,
                                                      recursive = True,
                                                      singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove storage directory", res['Message'] )
      return res
    self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'],
                                                                     directory,
                                                                     storageElement ) )
    return S_OK()

  def cleanCatalogContents( self, directory ):
    ''' wipe out everything from catalog under folder :directory:

    :param self: self reference
    :params str directory: folder name
    '''
    res = self.__getCatalogDirectoryContents( [directory] )
    if not res['OK']:
      return res
    filesFound = res['Value']
    if not filesFound:
      return S_OK()
    self.log.info( "Attempting to remove %d possible remnants from the catalog and storage" % len( filesFound ) )
    res = self.replicaManager.removeFile( filesFound )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in the catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the catalog" )
    return S_OK()

  def __getCatalogDirectoryContents( self, directories ):
    ''' get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    '''
    self.log.info( 'Obtaining the catalog contents for %d directories:' % len( directories ) )
    for directory in directories:
      self.log.info( directory )
    activeDirs = directories
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.replicaManager.getCatalogListDirectory( currentDir, singleFile = True )
      activeDirs.remove( currentDir )
      if not res['OK'] and res['Message'].endswith( 'The supplied path does not exist' ):
        self.log.info( "The supplied directory %s does not exist" % currentDir )
      elif not res['OK']:
        self.log.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Message'] ) )
      else:
        dirContents = res['Value']
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )
    self.log.info( "Found %d files" % len( allFiles ) )
    return S_OK( allFiles.keys() )

  def cleanTransformationLogFiles( self, directory ):
    ''' clean up transformation logs from directory :directory:

    :param self: self reference
    :param str directory: folder name
    '''
    self.log.info( "Removing log files found in the directory %s" % directory )
    res = self.replicaManager.removeStorageDirectory( directory, self.logSE, singleDirectory = True )
    if not res['OK']:
      self.log.error( "Failed to remove log files", res['Message'] )
      return res
    self.log.info( "Successfully removed transformation log directory" )
    return S_OK()

  #############################################################################
  #
  # These are the functional methods for archiving and cleaning transformations
  #

  def removeTransformationOutput( self, transID ):
    ''' This just removes any mention of the output data from the catalog and storage '''
    self.log.info( "Removing output data for transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    for directory in directories:
      if not re.search( '/LOG/', directory ):
        res = self.cleanCatalogContents( directory )
        if not res['OK']:
          return res
        res = self.cleanStorageContents( directory )
        if not res['OK']:
          return res
    self.log.info( "Removed directories in the catalog and storage for transformation" )
    # Clean ALL the possible remnants found in the metadata catalog
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    self.log.info( "Successfully removed output of transformation %d" % transID )
    # Change the status of the transformation to RemovedFiles
    res = self.transClient.setTransformationParameter( transID, 'Status', 'RemovedFiles' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to RemovedFiles" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to RemovedFiles" % ( transID ) )
    return S_OK()

  def archiveTransformation( self, transID ):
    ''' This just removes job from the jobDB and the transformation DB

    :param self: self reference
    :param int transID: transformation ID
    '''
    self.log.info( "Archiving transformation %s" % transID )
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully archived transformation %d" % transID )
    # Change the status of the transformation to archived
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Archived' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Archived" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Archived" % ( transID ) )
    return S_OK()

  def cleanTransformation( self, transID ):
    ''' This removes any mention of the supplied transformation
    '''
    self.log.info( "Cleaning transformation %s" % transID )
    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      self.log.error( 'Problem obtaining directories for transformation %s with result "%s"' % ( transID, res ) )
      return S_OK()
    directories = res['Value']
    # Clean the jobs in the WMS and any failover requests found
    res = self.cleanTransformationTasks( transID )
    if not res['OK']:
      return res
    # Clean the log files for the jobs
    for directory in directories:
      if re.search( '/LOG/', directory ):
        res = self.cleanTransformationLogFiles( directory )
        if not res['OK']:
          return res
      res = self.cleanCatalogContents( directory )
      if not res['OK']:
        return res
      res = self.cleanStorageContents( directory )
      if not res['OK']:
        return res
    # Clean ALL the possible remnants found in the BK
    res = self.cleanMetadataCatalogFiles( transID, directories )
    if not res['OK']:
      return res
    # Clean the transformation DB of the files and job information
    res = self.transClient.cleanTransformation( transID )
    if not res['OK']:
      return res
    self.log.info( "Successfully cleaned transformation %d" % transID )
    # Change the status of the transformation to deleted
    res = self.transClient.setTransformationParameter( transID, 'Status', 'Deleted' )
    if not res['OK']:
      self.log.error( "Failed to update status of transformation %s to Deleted" % ( transID ), res['Message'] )
      return res
    self.log.info( "Updated status of transformation %s to Deleted" % ( transID ) )
    return S_OK()

  def cleanMetadataCatalogFiles( self, transID ):
    ''' wipe out files from catalog '''
    res = self.metadataClient.findFilesByMetadata( { self.transfidmeta : transID } )
    if not res['OK']:
      return res
    fileToRemove = res['Value']
    if not fileToRemove:
      self.log.info( 'No files found for transID %s' % transID )
      return S_OK()
    res = self.replicaManager.removeFile( fileToRemove )
    if not res['OK']:
      return res
    for lfn, reason in res['Value']['Failed'].items():
      self.log.error( "Failed to remove file found in metadata catalog", "%s %s" % ( lfn, reason ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to remove all files found in the metadata catalog" )
    self.log.info( "Successfully removed all files found in the BK" )
    return S_OK()

  #############################################################################
  #
  # These are the methods for removing the jobs from the WMS and transformation DB
  #

  def cleanTransformationTasks( self, transID ):
    ''' clean tasks from WMS
    '''
    res = self.__getTransformationExternalIDs( transID )
    if not res['OK']:
      return res
    externalIDs = res['Value']
    if externalIDs:
      res = self.transClient.getTransformationParameters( transID, ['Type'] )
      if not res['OK']:
        self.log.error( "Failed to determine transformation type" )
        return res
      transType = res['Value']
      if transType in [ 'Replication', 'Removal' ]:
        res = self.__removeRequests( externalIDs )
      else:
        res = self.__removeWMSTasks( externalIDs )
      if not res['OK']:
        return res
    return S_OK()

  def __getTransformationExternalIDs( self, transID ):
    ''' collect all ExternalIDs for transformation :transID:

    :param self: self reference
    :param int transID: transforamtion ID
    '''
    res = self.transClient.getTransformationTasks( condDict = { 'TransformationID' : transID } )
    if not res['OK']:
      self.log.error( "Failed to get externalIDs for transformation %d" % transID, res['Message'] )
      return res
    externalIDs = [ taskDict['ExternalID'] for taskDict in res["Value"] ]
    self.log.info( "Found %d tasks for transformation" % len( externalIDs ) )
    return S_OK( externalIDs )

  def __removeRequests( self, requestIDs ):
    ''' dummy method '''
    self.log.error( "Not removing requests but should do" )
    return S_OK()

  def __removeWMSTasks( self, transJobIDs ):
    ''' wipe out jobs and their requests from the system

    TODO: should check request status, maybe FTS files as well ???

    :param self: self reference
    :param list trasnJobIDs: job IDs
    '''
    # Prevent 0 job IDs
    jobIDs = [ int( j ) for j in transJobIDs if int( j ) ]
    allRemove = True
    for jobList in breakListIntoChunks( jobIDs, 500 ):

      res = self.wmsClient.killJob( jobList )
      if res['OK']:
        self.log.info( "Successfully killed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to kill %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

      res = self.wmsClient.deleteJob( jobList )
      if res['OK']:
        self.log.info( "Successfully removed %d jobs from WMS" % len( jobList ) )
      elif ( "InvalidJobIDs" in res ) and ( "NonauthorizedJobIDs" not in res ) and ( "FailedJobIDs" not in res ):
        self.log.info( "Found %s jobs which did not exist in the WMS" % len( res['InvalidJobIDs'] ) )
      elif "NonauthorizedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs because not authorized" % len( res['NonauthorizedJobIDs'] ) )
        allRemove = False
      elif "FailedJobIDs" in res:
        self.log.error( "Failed to remove %s jobs" % len( res['FailedJobIDs'] ) )
        allRemove = False

    if not allRemove:
      return S_ERROR( "Failed to remove all remnants from WMS" )
    self.log.info( "Successfully removed all tasks from the WMS" )

    if not jobIDs:
      self.log.info( "JobIDs not present, unable to remove asociated requests." )
      return S_OK()

    res = self.requestClient.getRequestForJobs( jobIDs )
    if not res['OK']:
      self.log.error( "Failed to get requestID for jobs.", res['Message'] )
      return res
    failoverRequests = res['Value']
    self.log.info( "Found %d jobs with associated failover requests" % len( failoverRequests ) )
    if not failoverRequests:
      return S_OK()
    failed = 0
    for jobID, requestName in failoverRequests.items():
      # Put this check just in case, tasks must have associated jobs
      if jobID == 0 or jobID == '0':
        continue
      res = self.requestClient.deleteRequest( requestName )
      if not res['OK']:
        self.log.error( "Failed to remove request from RequestDB", res['Message'] )
        failed += 1
      else:
        self.log.verbose( "Removed request %s associated to job %d." % ( requestName, jobID ) )
    if failed:
      self.log.info( "Successfully removed %s requests" % ( len( failoverRequests ) - failed ) )
      self.log.info( "Failed to remove %s requests" % failed )
      return S_ERROR( "Failed to remove all the request from RequestDB" )
    self.log.info( "Successfully removed all the associated failover requests" )
    return S_OK()
Exemplo n.º 23
0
class ProductionJob(Job): #pylint: disable=too-many-public-methods, too-many-instance-attributes
  """ Production job class. Suitable for CLIC studies. Need to sub class and overload for other clients.
  """
  def __init__(self, script = None):
    super(ProductionJob, self).__init__( script )
    self.prodVersion = __RCSID__
    self.dryrun = False
    self.created = False
    self.checked = False
    self.call_finalization = False
    self.finalsdict = {}
    self.transfid = 0
    self.type = 'Production'
    self.csSection = '/Production/Defaults'
    self.ops = Operations()
    self.fc = FileCatalogClient()
    self.trc = TransformationClient()
    self.defaultProdID = '12345'
    self.defaultProdJobID = '12345'
    self.jobFileGroupSize = 1
    self.nbtasks = 1
    self.slicesize =0
    self.basename = ''
    self.basepath = self.ops.getValue('/Production/CLIC/BasePath','/ilc/prod/clic/')
    self.evttype = ''
    self.datatype = ''
    self.energycat = ''
    self.detector = ''
    self.currtrans = None
    self.description = ''

    self.finalpaths = []
    self.finalMetaDict = defaultdict( dict )
    self.prodMetaDict = {}
    self.finalMetaDictNonSearch = {}
    self.metadict_external = {}
    self.outputStorage = ''

    self.proxyinfo = getProxyInfo()

    self.inputdataquery = False
    self.inputBKSelection = {}
    self.plugin = 'Standard'
    self.prodGroup = ''

    self.prodTypes = ['MCGeneration', 'MCSimulation', 'Test', 'MCReconstruction',
                      'MCReconstruction_Overlay', 'Merge', 'Split',
                      'MCGeneration_ILD',
                      'MCSimulation_ILD',
                      'MCReconstruction_ILD',
                      'MCReconstruction_Overlay_ILD',
                      'Split_ILD'
                     ]
    self.prodparameters = {}
    self.prodparameters['NbInputFiles'] = 1
    self.prodparameters['nbevts']  = 0 
    #self.prodparameters["SWPackages"] = ''
    self._addParameter(self.workflow, "IS_PROD", 'JDL', True, "This job is a production job")
    if not script:
      self.__setDefaults()

    self._recBasePaths = {}
    self.maxFCFoldersToCheck = 100000

  #############################################################################
  def __setDefaults(self):
    """Sets some default parameters.
    """
    self.setPlatform(self.ops.getValue('%s/Platform' % (self.csSection), 'x86_64-slc5-gcc43-opt'))
    self.setCPUTime('300000')
    self.setLogLevel('verbose')
    self.setJobGroup('@{PRODUCTION_ID}')

    #version control
    self._setParameter('productionVersion', 'string', self.prodVersion, 'ProdAPIVersion')

    #General workflow parameters
    self._setParameter('PRODUCTION_ID',     'string', self.defaultProdID.zfill(8), 'ProductionID')
    self._setParameter('JOB_ID',            'string', self.defaultProdJobID.zfill(8), 'ProductionJobID')
    self._setParameter('Priority',             'JDL',                     '1', 'Priority')
    self._setParameter('emailAddress',      'string', '*****@*****.**', 'CrashEmailAddress')

  def _setParameter(self, name, parameterType, parameterValue, description):
    """Set parameters checking in CS in case some defaults need to be changed.
    """
    if self.ops.getValue('%s/%s' % (self.csSection, name), ''):
      LOG.debug('Setting %s from CS defaults = %s' % (name, self.ops.getValue('%s/%s' % (self.csSection, name))))
      self._addParameter(self.workflow, name, parameterType, self.ops.getValue('%s/%s' % (self.csSection, name), 
                                                                               'default'), description)
    else:
      LOG.debug('Setting parameter %s = %s' % (name, parameterValue))
      self._addParameter(self.workflow, name, parameterType, parameterValue, description)
  
  def setConfig(self,version):
    """ Define the Configuration package to obtain
    """
    appName = 'ILDConfig'
    self._addSoftware(appName.lower(), version)
    self.prodparameters['ILDConfigVersion'] = version
    self._addParameter( self.workflow, 'ILDConfigPackage', 'JDL', appName+version, 'ILDConfig package' )
    return S_OK()  

  def setClicConfig(self, version):
    """Define the ClicConfig package to obtain."""
    return self.setConfigPackage('ClicConfig', version)

  def setConfigPackage(self, appName, version):
    """Define the config package to obtain."""
    self._addSoftware(appName.lower(), version)
    self._addParameter(self.workflow, appName + 'Package', 'JDL', appName + version, appName + 'package')
    self.prodparameters[appName + 'Version'] = version
    return S_OK()

  def setDryRun(self, run):
    """ In case one wants to get all the info as if the prod was being submitted
    """
    self.dryrun = run
      
  #############################################################################
  def setProdGroup(self, group):
    """ Sets a user defined tag for the production as appears on the monitoring page
    """
    self.prodGroup = group
  #############################################################################
  def setProdPlugin(self, plugin):
    """ Sets the plugin to be used to creating the production jobs
    """
    self.plugin = plugin
    
  #############################################################################
  def setJobFileGroupSize(self, files):
    """ Sets the number of files to be input to each job created.
    """
    if self.checked:
      return self._reportError("This input is needed at the beginning of the production definition: it is \
      needed for total number of evts.")
    self.jobFileGroupSize = files
    self.prodparameters['NbInputFiles'] = files
    
  def setNbEvtsPerSlice(self,nbevts):
    """ Define the number of events in a slice.
    """
    self.slicesize = nbevts
    
  #############################################################################
  def setProdType(self, prodType):
    """Set prod type.
    """
    if prodType not in self.prodTypes:
      raise TypeError('Prod must be one of %s' % (', '.join(self.prodTypes)))
    self.setType(prodType)
  #############################################################################
  def setWorkflowName(self, name):
    """Set workflow name.
    """
    self.workflow.setName(name)
    self.name = name

  #############################################################################
  def setWorkflowDescription(self, desc):
    """Set workflow name.
    """
    self.workflow.setDescription(desc)
             
  #############################################################################
  def createWorkflow(self):
    """ Create XML for local testing.
    """
    name = '%s.xml' % self.name
    if os.path.exists(name):
      shutil.move(name,'%s.backup' % name)
    self.workflow.toXMLFile(name)
    
  #############################################################################
  def setOutputSE(self, outputse):
    """ Define where the output file(s) will go. 
    """
    self.outputStorage = outputse
    return S_OK()
  
  #############################################################################
  def setInputDataQuery(self, metadata):
    """ Define the input data query needed
    """

    retMetaKey = self._checkMetaKeys( metadata.keys() )
    if not retMetaKey['OK']:
      return retMetaKey

    if "ProdID" not in metadata:
      return self._reportError("Input metadata dictionary must contain at least a key 'ProdID' as reference")
    retDirs = self._checkFindDirectories( metadata )
    if not retDirs['OK']:
      return retDirs
    dirs = retDirs['Value'].values()
    for mdir in dirs[:self.maxFCFoldersToCheck]:
      LOG.notice("Directory: %s" % mdir)
      res = self.fc.getDirectoryUserMetadata(mdir)
      if not res['OK']:
        return self._reportError("Error looking up the catalog for directory metadata")
      compatmeta = res['Value']
      compatmeta.update(metadata)

    if 'EvtType' in compatmeta:
      self.evttype = JobHelpers.getValue( compatmeta['EvtType'], str, basestring )
    else:
      return self._reportError("EvtType is not in the metadata, it has to be!")

    if 'NumberOfEvents' in compatmeta:
      self.nbevts = JobHelpers.getValue( compatmeta['NumberOfEvents'], int, None )

    self.basename = self.evttype
    LOG.notice("MetaData: %s" % compatmeta)
    LOG.notice("MetaData: %s" % metadata)
    if "Energy" in compatmeta:
      self.energycat = JobHelpers.getValue( compatmeta["Energy"], str, (int, long, basestring) )
        
    if self.energycat.count("tev"):
      self.energy = Decimal("1000.") * Decimal(self.energycat.split("tev")[0])
    elif self.energycat.count("gev"):
      self.energy = Decimal("1.") * Decimal(self.energycat.split("gev")[0])
    else:
      self.energy = Decimal("1.") * Decimal(self.energycat)
    gendata = False
    if 'Datatype' in compatmeta:
      self.datatype = JobHelpers.getValue( compatmeta['Datatype'], str, basestring )
      if self.datatype == 'gen':
        gendata = True
    if "DetectorType" in compatmeta and not gendata:
      self.detector = JobHelpers.getValue( compatmeta["DetectorType"], str, basestring )
    self.inputBKSelection = metadata
    self.inputdataquery = True
    
    self.prodparameters['nbevts'] = self.nbevts 
    self.prodparameters["FCInputQuery"] = self.inputBKSelection

    return S_OK()

  def setDescription(self, desc):
    """ Set the production's description
    
    :param str desc: Description
    """
    self.description = desc
    return S_OK()

  def getBasePath(self):
    """ Return the base path. Updated by :any:`setInputDataQuery`.
    """
    return self.basepath
  
  def addFinalization(self, uploadData = False, registerData = False, uploadLog = False, sendFailover=False):
    """ Add finalization step

    :param bool uploadData: Upload or not the data to the storage
    :param bool uploadLog: Upload log file to storage (currently only available for admins, thus add them to OutputSandbox)
    :param bool sendFailover: Send Failover requests, and declare files as processed or unused in transfDB
    :param bool registerData: Register data in the file catalog
    """
    #TODO: Do the registration only once, instead of once for each job
    
    self.call_finalization = True
    self.finalsdict['uploadData'] = uploadData
    self.finalsdict['registerData'] = registerData
    self.finalsdict['uploadLog'] = uploadLog
    self.finalsdict['sendFailover'] = sendFailover

  def _addRealFinalization(self):  
    """ This is called at creation: now that the workflow is created at the last minute,
    we need to add this also at the last minute
    """
    importLine = 'from ILCDIRAC.Workflow.Modules.<MODULE> import <MODULE>'
    
    dataUpload = ModuleDefinition('UploadOutputData')
    dataUpload.setDescription('Uploads the output data')
    self._addParameter(dataUpload, 'enable', 'bool', False, 'EnableFlag')
    body = importLine.replace('<MODULE>', 'UploadOutputData')
    dataUpload.setBody(body)

    failoverRequest = ModuleDefinition('FailoverRequest')
    failoverRequest.setDescription('Sends any failover requests')
    self._addParameter(failoverRequest, 'enable', 'bool', False, 'EnableFlag')
    body = importLine.replace('<MODULE>', 'FailoverRequest')
    failoverRequest.setBody(body)

    registerdata = ModuleDefinition('RegisterOutputData')
    registerdata.setDescription('Module to add in the metadata catalog the relevant info about the files')
    self._addParameter(registerdata, 'enable', 'bool', False, 'EnableFlag')
    body = importLine.replace('<MODULE>', 'RegisterOutputData')
    registerdata.setBody(body)

    logUpload = ModuleDefinition('UploadLogFile')
    logUpload.setDescription('Uploads the output log files')
    self._addParameter(logUpload, 'enable', 'bool', False, 'EnableFlag')
    body = importLine.replace('<MODULE>', 'UploadLogFile')
    logUpload.setBody(body)

    errorReport = ModuleDefinition('ReportErrors')
    errorReport.setDescription('Reports errors at the end')
    body = importLine.replace('<MODULE>', 'ReportErrors')
    errorReport.setBody(body)

    finalization = StepDefinition('Job_Finalization')
    finalization.addModule(dataUpload)
    up = finalization.createModuleInstance('UploadOutputData', 'dataUpload')
    up.setValue("enable", self.finalsdict['uploadData'])

    finalization.addModule(registerdata)
    ro = finalization.createModuleInstance('RegisterOutputData', 'RegisterOutputData')
    ro.setValue("enable", self.finalsdict['registerData'])

    finalization.addModule(logUpload)
    ul  = finalization.createModuleInstance('UploadLogFile', 'logUpload')
    ul.setValue("enable", self.finalsdict['uploadLog'])

    finalization.addModule(failoverRequest)
    fr = finalization.createModuleInstance('FailoverRequest', 'failoverRequest')
    fr.setValue("enable", self.finalsdict['sendFailover'])

    finalization.addModule(errorReport)
    fr = finalization.createModuleInstance('ReportErrors', 'reportErrors')

    self.workflow.addStep(finalization)
    self.workflow.createStepInstance('Job_Finalization', 'finalization')

    return S_OK()
  
  def createProduction(self, name = None):
    """ Create production.
    """

    if not self.proxyinfo['OK']:
      return S_ERROR("Not allowed to create production, you need a production proxy.")
    if 'groupProperties' not in self.proxyinfo['Value']:
      return S_ERROR("Could not determine groupProperties, you do not have the right proxy.")
    groupProperties = self.proxyinfo['Value']['groupProperties']
    if 'ProductionManagement' not in groupProperties:
      return S_ERROR("Not allowed to create production, you need a production proxy.")

    if self.created:
      return S_ERROR("Production already created.")

    ###We need to add the applications to the workflow
    res = self._addToWorkflow()
    if not res['OK']:
      return res
    if self.call_finalization:
      self._addRealFinalization()
    
    workflowName = self.workflow.getName()
    fileName = '%s.xml' % workflowName
    LOG.verbose('Workflow XML file name is:', '%s' % fileName)
    try:
      self.createWorkflow()
    except Exception as x:
      LOG.error("Exception creating workflow", repr(x))
      return S_ERROR('Could not create workflow')
    with open(fileName, 'r') as oFile:
      workflowXML = oFile.read()
    if not name:
      name = workflowName

    res = self.trc.getTransformationStats(name)
    if res['OK']:
      return self._reportError("Transformation with name %s already exists! Cannot proceed." % name)
    
    ###Create Tranformation
    Trans = Transformation()
    Trans.setTransformationName(name)
    Trans.setDescription(self.description)
    Trans.setLongDescription(self.description)
    Trans.setType(self.type)
    self.prodparameters['JobType'] = self.type
    Trans.setPlugin(self.plugin)
    if self.inputdataquery:
      Trans.setGroupSize(self.jobFileGroupSize)
    Trans.setTransformationGroup(self.prodGroup)
    Trans.setBody(workflowXML)
    if not self.slicesize:
      Trans.setEventsPerTask(self.jobFileGroupSize * self.nbevts)
    else:
      Trans.setEventsPerTask(self.slicesize)
    self.currtrans = Trans
    if self.dryrun:
      LOG.notice('Would create prod called', name)
      self.transfid = 12345
    else: 
      res = Trans.addTransformation()
      if not res['OK']:
        LOG.error(res['Message'])
        return res
      self.transfid = Trans.getTransformationID()['Value']

    if self.inputBKSelection:
      res = self.applyInputDataQuery()
    if not self.dryrun:
      Trans.setAgentType("Automatic")  
      Trans.setStatus("Active")
    
    finals = []
    for finalpaths in self.finalpaths:
      finalpaths = finalpaths.rstrip("/")
      finalpaths += "/"+str(self.transfid).zfill(8)
      finals.append(finalpaths)
      self.finalMetaDict[finalpaths].update( { "ProdID": self.transfid } )
      self.finalMetaDict[finalpaths].update( self.prodMetaDict )
      # if 'ILDConfigVersion' in self.prodparameters:
      #   self.finalMetaDict[finalpaths].update({"ILDConfig":self.prodparameters['ILDConfigVersion']})
        
      if self.nbevts:
        self.finalMetaDict[finalpaths].update({'NumberOfEvents' : self.jobFileGroupSize * self.nbevts})
    self.finalpaths = finals
    self.created = True
    
    return S_OK()

  def setNbOfTasks(self, nbtasks):
    """ Define the number of tasks you want. Useful for generation jobs.
    """
    if not self.currtrans:
      LOG.error("Not transformation defined earlier")
      return S_ERROR("No transformation defined")
    if self.inputBKSelection and self.plugin not in  ['Limited', 'SlicedLimited']:
      LOG.error('Metadata selection activated, should not specify the number of jobs')
      return S_ERROR()
    self.nbtasks = nbtasks
    self.currtrans.setMaxNumberOfTasks(self.nbtasks) #pylint: disable=E1101
    return S_OK()

  def applyInputDataQuery(self, metadata = None, prodid = None):
    """ Tell the production to update itself using the metadata query specified, i.e. submit new jobs if new files 
    are added corresponding to same query.
    """
    if not self.transfid and self.currtrans:
      self.transfid = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101
    elif prodid:
      self.transfid = prodid
    if not self.transfid:
      LOG.error("Not transformation defined earlier")
      return S_ERROR("No transformation defined")
    if metadata:
      self.inputBKSelection = metadata

    if not self.dryrun:
      res = self.trc.createTransformationInputDataQuery(self.transfid, self.inputBKSelection)
      if not res['OK']:
        return res
    else:
      LOG.notice("Would use %s as metadata query for production" % str(self.inputBKSelection))
    return S_OK()
  
  def addMetadataToFinalFiles(self, metadict):
    """ Add additionnal non-query metadata 
    """
    self.metadict_external = metadict
    
    return S_OK()
  
  def finalizeProd(self, prodid = None, prodinfo = None):
    """ Finalize definition: submit to Transformation service and register metadata
    """
    currtrans = 0
    if self.currtrans:
      if not self.dryrun:
        currtrans = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101
      else:
        currtrans = 12345
    if prodid:
      currtrans = prodid
    if not currtrans:
      LOG.error("Not transformation defined earlier")
      return S_ERROR("No transformation defined")
    if prodinfo:
      self.prodparameters = prodinfo
      
    info = []
    info.append('%s Production %s has following parameters:\n' % (self.prodparameters['JobType'], currtrans))
    if "Process" in self.prodparameters:
      info.append('- Process %s' % self.prodparameters['Process'])
    if "Energy" in self.prodparameters:
      info.append('- Energy %s GeV' % self.prodparameters["Energy"])

    if not self.slicesize:
      self.prodparameters['nbevts'] = self.jobFileGroupSize * self.nbevts
    else:
      self.prodparameters['nbevts'] = self.slicesize
    if self.prodparameters['nbevts']:
      info.append("- %s events per job" % (self.prodparameters['nbevts']))
    if self.prodparameters.get('lumi', False):
      info.append('    corresponding to a luminosity %s fb' % (self.prodparameters['lumi'] * \
                                                               self.prodparameters['NbInputFiles']))
    if 'FCInputQuery' in self.prodparameters:
      info.append('Using InputDataQuery :')
      for key, val in self.prodparameters['FCInputQuery'].iteritems():
        info.append('    %s = %s' % (key, val))
    if "SWPackages" in self.prodparameters:
      info.append('- SW packages %s' % self.prodparameters["SWPackages"])
    if "SoftwareTag" in self.prodparameters:
      info.append('- SW tags %s' % self.prodparameters["SoftwareTag"])
    if "ILDConfigVersion" in self.prodparameters:
      info.append('- ILDConfig %s' % self.prodparameters['ILDConfigVersion'])  

    if 'ClicConfigVersion' in self.prodparameters:
      info.append('- ClicConfig %s' % self.prodparameters['ClicConfigVersion'] )

    if 'extraCLIArguments' in self.prodparameters:
      info.append('- ExtraCLIArguments %s' % self.prodparameters['extraCLIArguments'] )

    # as this is the very last call all applications are registered, so all software packages are known
    #add them the the metadata registration
    for finalpath in self.finalpaths:
      if finalpath not in self.finalMetaDictNonSearch:
        self.finalMetaDictNonSearch[finalpath] = {}
      if "SWPackages" in self.prodparameters:
        self.finalMetaDictNonSearch[finalpath]["SWPackages"] = self.prodparameters["SWPackages"]
        
      if self.metadict_external:
        self.finalMetaDictNonSearch[finalpath].update(self.metadict_external)  
    
    info.append('- Registered metadata: ')
    for path, metadata in sorted( self.finalMetaDict.iteritems() ):
      info.append('    %s = %s' % (path, metadata))
    info.append('- Registered non searchable metadata: ')
    for path, metadata in sorted( self.finalMetaDictNonSearch.iteritems() ):
      info.append('    %s = %s' % (path, metadata))

    infoString = '\n'.join(info)
    self.prodparameters['DetailedInfo'] = infoString
    
    for name, val in self.prodparameters.iteritems():
      result = self._setProdParameter(currtrans, name, val)
      if not result['OK']:
        LOG.error(result['Message'])

    res = self._registerMetadata()
    if not res['OK']:
      LOG.error('Could not register the following directories:', res['Message'])
      return res
    return S_OK()

  def _createDirectory(self, path, failed, mode=0o775):
    """Create the directory at path if it does not exist.

    :param str path: path to check
    :param list failed: list of failed paths
    :param int mode: mode to set for directory
    """
    exists = returnSingleResult(self.fc.isDirectory(path))
    if exists['OK'] and exists['Value']:
      LOG.verbose('Directory already exists:', path)
      return S_OK()
    result = returnSingleResult(self.fc.createDirectory(path))
    if not result['OK']:
      LOG.error('Failed to create directory:', '%s: %s' % (path, result['Message']))
      failed[path].append(result['Message'])
      return S_ERROR()
    LOG.verbose('Successfully created directory:', path)
    res = self.fc.changePathMode({path: mode}, False)
    if not res['OK']:
      LOG.error(res['Message'])
      failed[path].append(res['Message'])
      return S_ERROR()
    LOG.verbose('Successfully changed mode:', path)
    return S_OK()

  def _checkMetadata(self, path, metaCopy):
    """Get existing metadata, if it is the same do not set it again, otherwise return error."""
    existingMetadata = self.fc.getDirectoryUserMetadata(path.rstrip('/'))
    if not existingMetadata['OK']:
      return S_OK()
    failure = False
    for key, value in existingMetadata['Value'].iteritems():
      if key in metaCopy and metaCopy[key] != value:
        LOG.error('Metadata values for folder %s disagree for key %s: Existing(%r), new(%r)' %
                  (path, key, value, metaCopy[key]))
        failure = True
      elif key in metaCopy and metaCopy[key] == value:
        LOG.verbose('Meta entry is unchanged', '%s = %s' % (key, value))
        metaCopy.pop(key, None)
    if failure:
      return S_ERROR('Error when setting new metadata, already existing metadata disagrees!')
    return S_OK()

  def _registerMetadata(self):
    """Set metadata for given folders.

    Register path and metadata before the production actually runs. This allows for the definition
    of the full chain in 1 go.
    """
    prevent_registration = self.ops.getValue('Production/PreventMetadataRegistration', False)

    if self.dryrun or prevent_registration:
      LOG.notice('Would have created and registered the following\n',
                 '\n '.join([' * %s: %s' % (fPath, val) for fPath, val in self.finalMetaDict.iteritems()]))
      LOG.notice('Would have set this as non searchable metadata', str(self.finalMetaDictNonSearch))
      return S_OK()

    failed = defaultdict(list)
    for path, meta in sorted(self.finalMetaDict.items()):
      res = self._createDirectory(path, failed)
      if not res['OK']:
        continue
      LOG.verbose('Checking to set metadata:', meta)
      metaCopy = dict(meta)
      res = self._checkMetadata(path, metaCopy)
      if not res['OK']:
        return res
      if not metaCopy:
        LOG.verbose('No new metadata to set')
        continue

      LOG.verbose('Setting metadata information: ', '%s: %s' % (path, metaCopy))
      result = self.fc.setMetadata(path.rstrip('/'), metaCopy)
      if not result['OK']:
        LOG.error('Could not preset metadata', str(metaCopy))
        LOG.error('Could not preset metadata', result['Message'])
        failed[path].append(result['Message'])

    for path, meta in sorted(self.finalMetaDictNonSearch.items()):
      res = self._createDirectory(path, failed)
      if not res['OK']:
        continue
      LOG.verbose('Setting non searchable metadata information: ', '%s: %s' % (path, meta))
      result = self.fc.setMetadata(path.rstrip('/'), meta)
      if not result['OK']:
        LOG.error('Could not preset non searchable metadata', str(meta))
        LOG.error('Could not preset non searchable metadata', result['Message'])
        failed[path].append(result['Message'])

    if failed:
      return S_ERROR('Failed to register some metadata: %s' % dict(failed))
    return S_OK()

  def getMetadata(self):
    """ Return the corresponding metadata of the last step
    """
    metadict = {}
    for meta in self.finalMetaDict.values():
      metadict.update(meta)
    if 'NumberOfEvents' in metadict:
      del metadict['NumberOfEvents'] #As this is not supposed to be a searchable thing
    return metadict
  
  def _setProdParameter(self, prodID, pname, pvalue):
    """ Set a production parameter.
    """
    if isinstance( pvalue, list ):
      pvalue = '\n'.join(pvalue)

    if isinstance( pvalue, (int, long) ):
      pvalue = str(pvalue)
    if not self.dryrun:  
      result = self.trc.setTransformationParameter(int(prodID), str(pname), str(pvalue))
      if not result['OK']:
        LOG.error('Problem setting parameter %s for production %s and value:\n%s' % (prodID, pname, pvalue))
    else:
      LOG.notice("Adding %s=%s to transformation" % (str(pname), str(pvalue)))
      result = S_OK()
    return result
  
  def _jobSpecificParams(self, application):
    """ For production additional checks are needed: ask the user
    """

    if self.created:
      return S_ERROR("The production was created, you cannot add new applications to the job.")

    if not application.logFile:
      logf = application.appname + "_" + application.version + "_@{STEP_ID}.log"
      res = application.setLogFile(logf)
      if not res['OK']:
        return res
      
      #in fact a bit more tricky as the log files have the prodID and jobID in them
    
    ### Retrieve from the application the essential info to build the prod info.
    if not self.nbevts and not self.slicesize:
      self.nbevts = application.numberOfEvents
      if not self.nbevts:
        return S_ERROR("Number of events to process is not defined.")
    elif not application.numberOfEvents:
      if not self.slicesize:
        res = application.setNumberOfEvents(self.jobFileGroupSize * self.nbevts)
      else:
        res = application.setNumberOfEvents(self.slicesize)
      if not res['OK']:
        return res
    
    if application.numberOfEvents > 0 and (self.jobFileGroupSize * self.nbevts > application.numberOfEvents or self.slicesize > application.numberOfEvents):
      self.nbevts = application.numberOfEvents

    
    if not self.energy:
      if application.energy:
        self.energy = Decimal((("%1.2f" % float(application.energy)).rstrip('0').rstrip('.')))
      else:
        return S_ERROR("Could not find the energy defined, it is needed for the production definition.")
    elif not application.energy:
      res = application.setEnergy(float(self.energy))
      if not res['OK']:
        return res
    if self.energy:
      self._setParameter( "Energy", "float", float(self.energy), "Energy used")      
      self.prodparameters["Energy"] = float(self.energy)
      
    if not self.evttype:
      if hasattr(application, 'eventType'):
        self.evttype = application.eventType
      else:
        return S_ERROR("Event type not found nor specified, it's mandatory for the production paths.")  
      self.prodparameters['Process'] = self.evttype
      
    if not self.outputStorage:
      return S_ERROR("You need to specify the Output storage element")
    
    curpackage = "%s.%s" % (application.appname, application.version)
    if "SWPackages" in self.prodparameters:      
      if not self.prodparameters["SWPackages"].count(curpackage):
        self.prodparameters["SWPackages"] += ";%s" % ( curpackage )    
    else :
      self.prodparameters["SWPackages"] = "%s" % (curpackage)
    
    if not application.accountInProduction:
      res = self._updateProdParameters(application)
      if not res['OK']:
        return res  
      self.checked = True

      return S_OK()
    
    res = application.setOutputSE(self.outputStorage)
    if not res['OK']:
      return res
    
    energypath = self.getEnergyPath()

    if not self.basename:
      self.basename = self.evttype

    evttypepath = ''
    if not self.evttype[-1] == '/':
      evttypepath = self.evttype + '/'
    
    path = self.basepath  
    ###Need to resolve file names and paths
    if self.energy:
      self.finalMetaDict[self.basepath + energypath] = {"Energy":str(self.energy)}

    if hasattr(application, "setOutputRecFile") and not application.willBeCut:
      evtPath = self.basepath + energypath + evttypepath
      self.finalMetaDict[evtPath] = {'EvtType': self.evttype}
      detPath = evtPath + application.detectortype
      self.finalMetaDict[detPath] = {'DetectorType': application.detectortype}
      if application.keepRecFile:
        path = self.basepath + energypath + evttypepath + application.detectortype + '/REC'
        self.finalMetaDict[path] = {'Datatype': 'REC'}
        fname = self.basename + '_rec.slcio'
        application.setOutputRecFile(fname, path)
        LOG.info('Will store the files under', path)
        self.finalpaths.append(path)
      path = self.basepath + energypath + evttypepath + application.detectortype + '/DST'
      self.finalMetaDict[path] = {'Datatype': 'DST'}
      fname = self.basename + '_dst.slcio'
      application.setOutputDstFile(fname, path)  
      LOG.info('Will store the files under', path)
      self.finalpaths.append(path)

    elif hasattr(application, "outputFile") and hasattr(application, 'datatype') and not application.outputFile and not application.willBeCut:
      path = self.basepath + energypath + evttypepath
      self.finalMetaDict[path] = {"EvtType" : self.evttype}
      if hasattr(application, "detectortype"):
        if application.detectortype:
          path += application.detectortype
          self.finalMetaDict[path] = {"DetectorType" : application.detectortype}
          path += '/'
        elif self.detector:
          path += self.detector
          self.finalMetaDict[path] = {"DetectorType" : self.detector}
          path += '/'
      if not application.datatype and self.datatype:
        application.datatype = self.datatype
      path += application.datatype
      self.finalMetaDict[path] = {'Datatype' : application.datatype}
      LOG.info("Will store the files under", "%s" % path)
      self.finalpaths.append(path)
      extension = 'stdhep'
      if application.datatype in ['SIM', 'REC']:
        extension = 'slcio'
      fname = self.basename + "_%s" % (application.datatype.lower()) + "." + extension
      application.setOutputFile(fname, path)  
      
    self.basepath = path

    res = self._updateProdParameters(application)
    if not res['OK']:
      return res
    
    self.checked = True
      
    return S_OK()

  def _updateProdParameters(self, application):
    """ Update the prod parameters stored in the production parameters visible from the web
    """
    try:
      self.prodparameters.update(application.prodparameters)
    except Exception as x:
      return S_ERROR("Exception: %r" % x )

    if hasattr( application, 'extraCLIArguments' ) and application.extraCLIArguments:
      self.prodparameters['extraCLIArguments'] = repr(application.extraCLIArguments)

    return S_OK()

  def _jobSpecificModules(self, application, step):
    return application._prodjobmodules(step)

  def getEnergyPath(self):
    """returns the energy path 250gev or 3tev or 1.4tev etc."""
    energy = Decimal(str(self.energy))
    tD = Decimal('1000.0')
    unit = 'gev' if energy < tD else 'tev'
    energy = energy if energy < tD else energy/tD
    energyPath = ("%1.2f" % energy).rstrip('0').rstrip('.')
    energyPath = energyPath+unit+'/'

    LOG.info("Energy path is: ", energyPath)
    return energyPath


  def _checkMetaKeys( self, metakeys, extendFileMeta=False ):
    """ check if metadata keys are allowed to be metadata

    :param list metakeys: metadata keys for production metadata
    :param bool extendFileMeta: also use FileMetaFields for checking meta keys
    :returns: S_OK, S_ERROR
    """

    res = self.fc.getMetadataFields()
    if not res['OK']:
      LOG.error("Could not contact File Catalog")
      return S_ERROR("Could not contact File Catalog")
    metaFCkeys = res['Value']['DirectoryMetaFields'].keys()
    if extendFileMeta:
      metaFCkeys.extend( res['Value']['FileMetaFields'].keys() )

    for key in metakeys:
      for meta in metaFCkeys:
        if meta != key and meta.lower() == key.lower():
          return self._reportError("Key syntax error %r, should be %r" % (key, meta), name = self.__class__.__name__)
      if key not in metaFCkeys:
        return self._reportError("Key %r not found in metadata keys, allowed are %r" % (key, metaFCkeys))

    return S_OK()

  def _checkFindDirectories( self, metadata ):
    """ find directories by metadata and check that there are directories found

    :param dict metadata: metadata dictionary
    :returns: S_OK, S_ERROR
    """

    res = self.fc.findDirectoriesByMetadata(metadata)
    if not res['OK']:
      return self._reportError("Error looking up the catalog for available directories")
    elif len(res['Value']) < 1:
      return self._reportError('Could not find any directories corresponding to the query issued')
    return res

  def setReconstructionBasePaths( self, recPath, dstPath ):
    """ set the output Base paths for the reconstruction REC and DST files """
    self._recBasePaths['REC'] = recPath
    self._recBasePaths['DST'] = dstPath
Exemplo n.º 24
0
class TransformationAgent(AgentModule):
    def initialize(self):
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.checkCatalog = self.am_getOption('CheckCatalog', 'yes')

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/ProductionManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transDB = TransformationClient('TransformationDB')
        self.rm = ReplicaManager()
        return S_OK()

    def execute(self):
        # Get the transformations to process
        res = self.getTransformations()
        if not res['OK']:
            gLogger.info("%s.execute: Failed to obtain transformations: %s" %
                         (AGENT_NAME, res['Message']))
            return S_OK()
        # Process the transformations
        for transDict in res['Value']:
            transID = long(transDict['TransformationID'])
            gLogger.info("%s.execute: Processing transformation %s." %
                         (AGENT_NAME, transID))
            startTime = time.time()
            res = self.processTransformation(transDict)
            if not res['OK']:
                gLogger.info(
                    "%s.execute: Failed to process transformation: %s" %
                    (AGENT_NAME, res['Message']))
            else:
                gLogger.info(
                    "%s.execute: Processed transformation in %.1f seconds" %
                    (AGENT_NAME, time.time() - startTime))
        return S_OK()

    def getTransformations(self):
        # Obtain the transformations to be executed
        transName = self.am_getOption('Transformation', 'All')
        if transName == 'All':
            gLogger.info(
                "%s.getTransformations: Initializing general purpose agent." %
                AGENT_NAME)
            res = self.transDB.getTransformations(
                {'Status': ['Active', 'Completing', 'Flush']},
                extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "%s.getTransformations: Failed to get transformations." %
                    AGENT_NAME, res['Message'])
                return res
            transformations = res['Value']
            gLogger.info(
                "%s.getTransformations: Obtained %d transformations to process"
                % (AGENT_NAME, len(transformations)))
        else:
            gLogger.info(
                "%s.getTransformations: Initializing for transformation %s." %
                (AGENT_NAME, transName))
            res = self.transDB.getTransformation(transName, extraParams=True)
            if not res['OK']:
                gLogger.error(
                    "%s.getTransformations: Failed to get transformation." %
                    AGENT_NAME, res['Message'])
                return res
            transformations = [res['Value']]
        return S_OK(transformations)

    def processTransformation(self, transDict):
        transID = transDict['TransformationID']
        # First get the LFNs associated to the transformation
        res = self.transDB.getTransformationFiles(condDict={
            'TransformationID': transID,
            'Status': 'Unused'
        })
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to obtain input data." %
                AGENT_NAME, res['Message'])
            return res
        transFiles = res['Value']
        lfns = res['LFNs']
        if not lfns:
            gLogger.info(
                "%s.processTransformation: No 'Unused' files found for transformation."
                % AGENT_NAME)
            if transDict['Status'] == 'Flush':
                res = self.transDB.setTransformationParameter(
                    transID, 'Status', 'Active')
                if not res['OK']:
                    gLogger.error(
                        "%s.execute: Failed to update transformation status to 'Active'."
                        % AGENT_NAME, res['Message'])
                else:
                    gLogger.info(
                        "%s.execute: Updated transformation status to 'Active'."
                        % AGENT_NAME)
            return S_OK()

        # Check the data is available with replicas
        res = self.__getDataReplicas(transID,
                                     lfns,
                                     active=(transDict['Type'].lower()
                                             not in ["replication",
                                                     "removal"]))
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to get data replicas" %
                AGENT_NAME, res['Message'])
            return res
        dataReplicas = res['Value']

        # Get the plug-in type and create the plug-in object
        plugin = 'Standard'
        if transDict.has_key('Plugin') and transDict['Plugin']:
            plugin = transDict['Plugin']
        gLogger.info(
            "%s.processTransformation: Processing transformation with '%s' plug-in."
            % (AGENT_NAME, plugin))
        res = self.__generatePluginObject(plugin)
        if not res['OK']:
            return res
        oPlugin = res['Value']

        # Get the plug-in and set the required params
        oPlugin.setParameters(transDict)
        oPlugin.setInputData(dataReplicas)
        oPlugin.setTransformationFiles(transFiles)
        res = oPlugin.generateTasks()
        if not res['OK']:
            gLogger.error(
                "%s.processTransformation: Failed to generate tasks for transformation."
                % AGENT_NAME, res['Message'])
            return res
        tasks = res['Value']
        # Create the tasks
        allCreated = True
        created = 0
        for se, lfns in tasks:
            res = self.transDB.addTaskForTransformation(transID, lfns, se)
            if not res['OK']:
                gLogger.error(
                    "%s.processTransformation: Failed to add task generated by plug-in."
                    % AGENT_NAME, res['Message'])
                allCreated = False
            else:
                created += 1
        if created:
            gLogger.info(
                "%s.processTransformation: Successfully created %d tasks for transformation."
                % (AGENT_NAME, created))

        # If this production is to Flush
        if transDict['Status'] == 'Flush' and allCreated:
            res = self.transDB.setTransformationParameter(
                transID, 'Status', 'Active')
            if not res['OK']:
                gLogger.error(
                    "%s.execute: Failed to update transformation status to 'Active'."
                    % AGENT_NAME, res['Message'])
            else:
                gLogger.info(
                    "%s.execute: Updated transformation status to 'Active'." %
                    AGENT_NAME)
        return S_OK()

    ######################################################################
    #
    # Internal methods used by the agent
    #

    def __generatePluginObject(self, plugin):
        """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
        try:
            plugModule = __import__(self.pluginLocation, globals(), locals(),
                                    ['TransformationPlugin'])
        except Exception, x:
            gLogger.exception(
                "%s.__generatePluginObject: Failed to import 'TransformationPlugin'"
                % AGENT_NAME, '', x)
            return S_ERROR()
        try:
            evalString = "plugModule.TransformationPlugin('%s')" % plugin
            return S_OK(eval(evalString))
        except Exception, x:
            gLogger.exception(
                "%s.__generatePluginObject: Failed to create %s()." %
                (AGENT_NAME, plugin), '', x)
            return S_ERROR()
Exemplo n.º 25
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            "TransformationID": [types.IntType, types.LongType],
            "TransformationName": types.StringTypes,
            "Status": types.StringTypes,
            "Description": types.StringTypes,
            "LongDescription": types.StringTypes,
            "Type": types.StringTypes,
            "Plugin": types.StringTypes,
            "AgentType": types.StringTypes,
            "FileMask": types.StringTypes,
            "TransformationGroup": types.StringTypes,
            "GroupSize": [types.IntType, types.LongType, types.FloatType],
            "InheritedFrom": [types.IntType, types.LongType],
            "Body": types.StringTypes,
            "MaxNumberOfTasks": [types.IntType, types.LongType],
            "EventsPerTask": [types.IntType, types.LongType],
        }
        self.paramValues = {
            "TransformationID": 0,
            "TransformationName": "",
            "Status": "New",
            "Description": "",
            "LongDescription": "",
            "Type": "",
            "Plugin": "Standard",
            "AgentType": "Manual",
            "FileMask": "",
            "TransformationGroup": "General",
            "GroupSize": 1,
            "InheritedFrom": 0,
            "Body": "",
            "MaxNumberOfTasks": 0,
            "EventsPerTask": 0,
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            "Transformations/AllowedPlugins", ["Broadcast", "Standard", "BySize", "ByShare"]
        )
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues["TransformationID"] = transID
            res = self.getTransformation()
            if res["OK"]:
                self.exists = True
            elif res["Message"] == "Transformation does not exist":
                raise AttributeError, "TransformationID %d does not exist" % transID
            else:
                self.paramValues["TransformationID"] = 0
                gLogger.fatal(
                    "Failed to get transformation from database", "%s @ %s" % (transID, self.transClient.serverURL)
                )

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE("TargetSE", seList)

    def setSourceSE(self, seList):
        return self.__setSE("SourceSE", seList)

    def __setSE(self, se, seList):
        if type(seList) in types.StringTypes:
            try:
                seList = eval(seList)
            except:
                seList = seList.replace(",", " ").split()
        res = self.__checkSEs(seList)
        if not res["OK"]:
            return res
        self.item_called = se
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find("get") == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find("set") == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError, name

    def __getParam(self):
        if self.item_called == "Available":
            return S_OK(self.paramTypes.keys())
        if self.item_called == "Parameters":
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            oldValue = self.paramValues[self.item_called]
            if oldValue != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError, "%s %s %s expected one of %s" % (
                        self.item_called,
                        value,
                        type(value),
                        self.paramTypes[self.item_called],
                    )
        if not self.item_called in self.paramTypes.keys():
            if not self.paramValues.has_key(self.item_called):
                change = True
            else:
                oldValue = self.paramValues[self.item_called]
                if oldValue != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" % self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues["TransformationID"]
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(transID, self.item_called, value)
                if not res["OK"]:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res["Value"]
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error("Unable to invoke setter %s, it isn't a member function" % setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res["Value"]
        if printOutput:
            self._printFormattedDictList(
                loggingList, ["Message", "MessageDate", "AuthorDN"], "MessageDate", "MessageDate"
            )
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation("extendTransformation", nTasks, printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation("cleanTransformation", printOutput=printOutput)
        if res["OK"]:
            self.paramValues["Status"] = "Cleaned"
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation("deleteTransformation", printOutput=printOutput)
        if res["OK"]:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation("addFilesToTransformation", lfns, printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation("setFileStatusForTransformation", status, lfns, printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation("getTransformationTaskStats", printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation("getTransformationStats", printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation("deleteTasks", taskMin, taskMax, printOutput=printOutput)

    def addTaskForTransformation(self, lfns=[], se="Unknown", printOutput=False):
        return self.__executeOperation("addTaskForTransformation", lfns, se, printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation("setTaskStatus", taskID, status, printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues["TransformationID"]
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop("printOutput")
        fcn = None
        if hasattr(self.transClient, operation) and callable(getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR("Unable to invoke %s, it isn't a member funtion of TransformationClient")
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(
        self,
        fileStatus=[],
        lfns=[],
        outputFields=[
            "FileID",
            "LFN",
            "Status",
            "TaskID",
            "TargetSE",
            "UsedSE",
            "ErrorCount",
            "InsertedTime",
            "LastUpdate",
        ],
        orderBy="FileID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if fileStatus:
            condDict["Status"] = fileStatus
        if lfns:
            condDict["LFN"] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "FileID", orderBy)
        return res

    def getTransformationTasks(
        self,
        taskStatus=[],
        taskIDs=[],
        outputFields=[
            "TransformationID",
            "TaskID",
            "ExternalStatus",
            "ExternalID",
            "TargetSE",
            "CreationTime",
            "LastUpdateTime",
        ],
        orderBy="TaskID",
        printOutput=False,
    ):
        condDict = {"TransformationID": self.paramValues["TransformationID"]}
        if taskStatus:
            condDict["ExternalStatus"] = taskStatus
        if taskIDs:
            condDict["TaskID"] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "TaskID", orderBy)
        return res

    #############################################################################
    def getTransformations(
        self,
        transID=[],
        transStatus=[],
        outputFields=["TransformationID", "Status", "AgentType", "TransformationName", "CreationDate"],
        orderBy="TransformationID",
        printOutput=False,
    ):
        condDict = {}
        if transID:
            condDict["TransformationID"] = transID
        if transStatus:
            condDict["Status"] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" % res["ParameterNames"].join(" "))
            elif not res["Value"]:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res["Value"], outputFields, "TransformationID", orderBy)
        return res

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res["OK"]:
            return self._errorReport(res, "Failed transformation sanity check")
        if printOutput:
            gLogger.info("Will attempt to create transformation with the following parameters")
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues["TransformationName"],
            self.paramValues["Description"],
            self.paramValues["LongDescription"],
            self.paramValues["Type"],
            self.paramValues["Plugin"],
            self.paramValues["AgentType"],
            self.paramValues["FileMask"],
            transformationGroup=self.paramValues["TransformationGroup"],
            groupSize=self.paramValues["GroupSize"],
            inheritedFrom=self.paramValues["InheritedFrom"],
            body=self.paramValues["Body"],
            maxTasks=self.paramValues["MaxNumberOfTasks"],
            eventsPerTask=self.paramValues["EventsPerTask"],
            addFiles=addFiles,
        )
        if not res["OK"]:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res["Value"]
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if not self.paramTypes.has_key(paramName):
                res = self.transClient.setTransformationParameter(transID, paramName, paramValue)
                if not res["OK"]:
                    gLogger.error("Failed to add parameter", "%s %s" % (paramName, res["Message"]))
                    gLogger.notice("To add this parameter later please execute the following.")
                    gLogger.notice("oTransformation = Transformation(%d)" % transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """ Few checks
    """
        if self.paramValues["TransformationID"]:
            gLogger.info("You are currently working with an active transformation definition.")
            gLogger.info("If you wish to create a new transformation reset the TransformationID.")
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = ["TransformationName", "Description", "LongDescription", "Type"]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info("%s is not defined for this transformation. This is required..." % parameter)
                self.paramValues[parameter] = raw_input("Please enter the value of " + parameter + " ")

        plugin = self.paramValues["Plugin"]
        if plugin:
            if not plugin in self.supportedPlugins:
                gLogger.info("The selected Plugin (%s) is not known to the transformation agent." % plugin)
                res = self.__promptForParameter("Plugin", choices=self.supportedPlugins, default="Standard")
                if not res["OK"]:
                    return res
                self.paramValues["Plugin"] = res["Value"]

        plugin = self.paramValues["Plugin"]

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues["GroupSize"]
        if groupSize <= 0:
            gLogger.info("The GroupSize was found to be less than zero. It has been set to 1.")
            res = self.setGroupSize(1)
            if not res["OK"]:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s" % (", ".join(["SourceSE", "TargetSE"]))
        )
        requiredParams = ["SourceSE", "TargetSE"]
        for requiredParam in requiredParams:
            if (not self.paramValues.has_key(requiredParam)) or (not self.paramValues[requiredParam]):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR("Unable to invoke %s, this function hasn't been implemented." % setterName)
                ses = paramValue.replace(",", " ").split()
                res = setter(ses)
                if not res["OK"]:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections("/Resources/StorageElements")
        if not res["OK"]:
            return self._errorReport(res, "Failed to get possible StorageElements")
        missing = []
        for se in seList:
            if not se in res["Value"]:
                gLogger.error("StorageElement %s is not known" % se)
                missing.append(se)
        if missing:
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self, parameter, choices=[], default="", insert=True):
        res = promptUser("Please enter %s" % parameter, choices=choices, default=default)
        if not res["OK"]:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res["Value"]))
        paramValue = res["Value"]
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR("Unable to invoke %s, it isn't a member function of Transformation!")
            res = setter(paramValue)
            if not res["OK"]:
                return res
        return S_OK(paramValue)
Exemplo n.º 26
0
class Transformation( API ):

  #############################################################################
  def __init__( self, transID = 0, transClient = None ):
    """ c'tor
    """
    super( Transformation, self ).__init__()

    self.paramTypes = { 'TransformationID'      : [types.IntType, types.LongType],
                        'TransformationName'    : types.StringTypes,
                        'Status'                : types.StringTypes,
                        'Description'           : types.StringTypes,
                        'LongDescription'       : types.StringTypes,
                        'Type'                  : types.StringTypes,
                        'Plugin'                : types.StringTypes,
                        'AgentType'             : types.StringTypes,
                        'FileMask'              : types.StringTypes,
                        'TransformationGroup'   : types.StringTypes,
                        'GroupSize'             : [types.IntType, types.LongType, types.FloatType],
                        'InheritedFrom'         : [types.IntType, types.LongType],
                        'Body'                  : types.StringTypes,
                        'MaxNumberOfTasks'      : [types.IntType, types.LongType],
                        'EventsPerTask'         : [types.IntType, types.LongType]}
    self.paramValues = { 'TransformationID'      : 0,
                         'TransformationName'    : '',
                         'Status'                : 'New',
                         'Description'           : '',
                         'LongDescription'       : '',
                         'Type'                  : '',
                         'Plugin'                : 'Standard',
                         'AgentType'             : 'Manual',
                         'FileMask'              : '',
                         'TransformationGroup'   : 'General',
                         'GroupSize'             : 1,
                         'InheritedFrom'         : 0,
                         'Body'                  : '',
                         'MaxNumberOfTasks'       : 0,
                         'EventsPerTask'          : 0}
    self.ops = Operations()
    self.supportedPlugins = self.ops.getValue( 'Transformations/AllowedPlugins',
                                               ['Broadcast', 'Standard', 'BySize', 'ByShare'] )
    if not transClient:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    self.serverURL = self.transClient.getServer()
    self.exists = False
    if transID:
      self.paramValues['TransformationID'] = transID
      res = self.getTransformation()
      if res['OK']:
        self.exists = True
      elif res['Message'] == 'Transformation does not exist':
        raise AttributeError( 'TransformationID %d does not exist' % transID )
      else:
        self.paramValues['TransformationID'] = 0
        gLogger.fatal( "Failed to get transformation from database", "%s @ %s" % ( transID,
                                                                                   self.transClient.serverURL ) )

  def setServer( self, server ):
    self.serverURL = server
    self.transClient.setServer( self.serverURL )

  def getServer( self ):
    return self.serverURL

  def reset( self, transID = 0 ):
    self.__init__( transID )
    self.transClient.setServer( self.serverURL )
    return S_OK()

  def setTargetSE( self, seList ):
    return self.__setSE( 'TargetSE', seList )

  def setSourceSE( self, seList ):
    return self.__setSE( 'SourceSE', seList )

  def setBody( self, body ):
    """ check that the body is a string, or using the proper syntax for multiple operations

    :param body: transformation body, for example

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :type body: string or list of tuples (or lists) of string and dictionaries
    :raises TypeError: If the structure is not as expected
    :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used
    :returns: S_OK, S_ERROR
    """
    self.item_called = "Body"
    if isinstance( body, basestring ):
      return self.__setParam( body )
    if not isinstance( body, ( list, tuple ) ):
      raise TypeError( "Expected list or string, but %r is %s" % ( body, type( body ) ) )

    for tup in body:
      if not isinstance( tup, ( tuple, list ) ):
        raise TypeError( "Expected tuple or list, but %r is %s" % ( tup, type( tup ) ) )
      if len( tup ) != 2:
        raise TypeError( "Expected 2-tuple, but %r is length %d" % ( tup, len( tup ) ) )
      if not isinstance( tup[0], basestring ):
        raise TypeError( "Expected string, but first entry in tuple %r is %s" % ( tup, type( tup[0] ) ) )
      if not isinstance( tup[1], dict ):
        raise TypeError( "Expected dictionary, but second entry in tuple %r is %s" % ( tup, type( tup[0] ) ) )
      for par, val in tup[1].iteritems():
        if not isinstance( par, basestring ):
          raise TypeError( "Expected string, but key in dictionary %r is %s" % ( par, type( par ) ) )
        if not par in Operation.ATTRIBUTE_NAMES:
          raise ValueError( "Unknown attribute for Operation: %s" % par )
        if not isinstance( val, ( basestring, int, long, float, list, tuple, dict ) ):
          raise TypeError( "Cannot encode %r, in json" % ( val ) )
      return self.__setParam( json.dumps( body ) )

  def __setSE( self, seParam, seList ):
    if isinstance( seList, basestring ):
      try:
        seList = eval( seList )
      except:
        seList = seList.split( ',' )
    elif isinstance( seList, ( list, dict, tuple ) ):
      seList = list( seList )
    else:
      return S_ERROR( "Bad argument type" )
    res = self.__checkSEs( seList )
    if not res['OK']:
      return res
    self.item_called = seParam
    return self.__setParam( seList )

  def __getattr__( self, name ):
    if name.find( 'get' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__getParam
    if name.find( 'set' ) == 0:
      item = name[3:]
      self.item_called = item
      return self.__setParam
    raise AttributeError( name )

  def __getParam( self ):
    if self.item_called == 'Available':
      return S_OK( self.paramTypes.keys() )
    if self.item_called == 'Parameters':
      return S_OK( self.paramValues )
    if self.item_called in self.paramValues:
      return S_OK( self.paramValues[self.item_called] )
    raise AttributeError( "Unknown parameter for transformation: %s" % self.item_called )

  def __setParam( self, value ):
    change = False
    if self.item_called in self.paramTypes:
      if self.paramValues[self.item_called] != value:
        if type( value ) in self.paramTypes[self.item_called]:
          change = True
        else:
          raise TypeError( "%s %s %s expected one of %s" % ( self.item_called, value, type( value ),
                                                             self.paramTypes[self.item_called] ) )
    else:
      if self.item_called not in self.paramValues:
        change = True
      else:
        if self.paramValues[self.item_called] != value:
          change = True
    if not change:
      gLogger.verbose( "No change of parameter %s required" % self.item_called )
    else:
      gLogger.verbose( "Parameter %s to be changed" % self.item_called )
      transID = self.paramValues['TransformationID']
      if self.exists and transID:
        res = self.transClient.setTransformationParameter( transID, self.item_called, value )
        if not res['OK']:
          return res
      self.paramValues[self.item_called] = value
    return S_OK()

  def getTransformation( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformation( transID, extraParams = True )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transParams = res['Value']
    for paramName, paramValue in transParams.items():
      setter = None
      setterName = "set%s" % paramName
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setterName:
        gLogger.error( "Unable to invoke setter %s, it isn't a member function" % setterName )
        continue
      setter( paramValue )
    if printOutput:
      gLogger.info( "No printing available yet" )
    return S_OK( transParams )

  def getTransformationLogging( self, printOutput = False ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    res = self.transClient.getTransformationLogging( transID )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    loggingList = res['Value']
    if printOutput:
      self._printFormattedDictList( loggingList, ['Message', 'MessageDate', 'AuthorDN'], 'MessageDate', 'MessageDate' )
    return S_OK( loggingList )

  def extendTransformation( self, nTasks, printOutput = False ):
    return self.__executeOperation( 'extendTransformation', nTasks, printOutput = printOutput )

  def cleanTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'cleanTransformation', printOutput = printOutput )
    if res['OK']:
      self.paramValues['Status'] = 'Cleaned'
    return res

  def deleteTransformation( self, printOutput = False ):
    res = self.__executeOperation( 'deleteTransformation', printOutput = printOutput )
    if res['OK']:
      self.reset()
    return res

  def addFilesToTransformation( self, lfns, printOutput = False ):
    return self.__executeOperation( 'addFilesToTransformation', lfns, printOutput = printOutput )

  def setFileStatusForTransformation( self, status, lfns, printOutput = False ):
    return self.__executeOperation( 'setFileStatusForTransformation', status, lfns, printOutput = printOutput )

  def getTransformationTaskStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationTaskStats', printOutput = printOutput )

  def getTransformationStats( self, printOutput = False ):
    return self.__executeOperation( 'getTransformationStats', printOutput = printOutput )

  def deleteTasks( self, taskMin, taskMax, printOutput = False ):
    return self.__executeOperation( 'deleteTasks', taskMin, taskMax, printOutput = printOutput )

  def addTaskForTransformation( self, lfns = [], se = 'Unknown', printOutput = False ):
    return self.__executeOperation( 'addTaskForTransformation', lfns, se, printOutput = printOutput )

  def setTaskStatus( self, taskID, status, printOutput = False ):
    return self.__executeOperation( 'setTaskStatus', taskID, status, printOutput = printOutput )

  def __executeOperation( self, operation, *parms, **kwds ):
    transID = self.paramValues['TransformationID']
    if not transID:
      gLogger.fatal( "No TransformationID known" )
      return S_ERROR()
    printOutput = kwds.pop( 'printOutput' )
    fcn = None
    if hasattr( self.transClient, operation ) and callable( getattr( self.transClient, operation ) ):
      fcn = getattr( self.transClient, operation )
    if not fcn:
      return S_ERROR( "Unable to invoke %s, it isn't a member funtion of TransformationClient" )
    res = fcn( transID, *parms, **kwds )
    if printOutput:
      self._prettyPrint( res )
    return res

  def getTransformationFiles( self, fileStatus = [], lfns = [], outputFields = ['FileID', 'LFN', 'Status', 'TaskID',
                                                                                'TargetSE', 'UsedSE', 'ErrorCount',
                                                                                'InsertedTime', 'LastUpdate'],
                              orderBy = 'FileID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if fileStatus:
      condDict['Status'] = fileStatus
    if lfns:
      condDict['LFN'] = lfns
    res = self.transClient.getTransformationFiles( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'FileID', orderBy )
    return res

  def getTransformationTasks( self, taskStatus = [], taskIDs = [], outputFields = ['TransformationID', 'TaskID',
                                                                                   'ExternalStatus', 'ExternalID',
                                                                                   'TargetSE', 'CreationTime',
                                                                                   'LastUpdateTime'],
                              orderBy = 'TaskID', printOutput = False ):
    condDict = {'TransformationID':self.paramValues['TransformationID']}
    if taskStatus:
      condDict['ExternalStatus'] = taskStatus
    if taskIDs:
      condDict['TaskID'] = taskIDs
    res = self.transClient.getTransformationTasks( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TaskID', orderBy )
    return res

  #############################################################################
  def getTransformations( self, transID = [], transStatus = [], outputFields = ['TransformationID', 'Status',
                                                                                'AgentType', 'TransformationName',
                                                                                'CreationDate'],
                          orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getAuthorDNfromProxy( self ):
    """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
    username = ""
    author = ""
    res = getProxyInfo()
    if res['OK']:
      author = res['Value']['identity']
      username = res['Value']['username']
    else:
      gLogger.error( "Unable to get uploaded proxy Info %s " % res['Message'] )
      return S_ERROR( res['Message'] )

    res = {'username' : username, 'authorDN' : author }
    return S_OK( res )

  #############################################################################
  def getTransformationsByUser( self, authorDN = "", userName = "", transID = [], transStatus = [],
                                outputFields = ['TransformationID', 'Status',
                                                'AgentType', 'TransformationName',
                                                'CreationDate', 'AuthorDN'],
                                orderBy = 'TransformationID', printOutput = False ):
    condDict = {}
    if authorDN == "":
      res = self.getAuthorDNfromProxy()
      if not res['OK']:
        gLogger.error( res['Message'] )
        return S_ERROR( res['Message'] )
      else:
        foundUserName = res['Value']['username']
        foundAuthor = res['Value']['authorDN']
        # If the username whom created the uploaded proxy is different than the provided username report error and exit
        if not ( userName == ""  or userName == foundUserName ):
          gLogger.error( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) )
          return S_ERROR( "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')" % ( userName, foundUserName ) )

        userName = foundUserName
        authorDN = foundAuthor
        gLogger.info( "Will list transformations created by user '%s' with status '%s'" % ( userName, ', '.join( transStatus ) ) )
    else:
      gLogger.info( "Will list transformations created by '%s' with status '%s'" % ( authorDN, ', '.join( transStatus ) ) )

    condDict['AuthorDN'] = authorDN
    if transID:
      condDict['TransformationID'] = transID
    if transStatus:
      condDict['Status'] = transStatus
    res = self.transClient.getTransformations( condDict = condDict )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res

    if printOutput:
      if not outputFields:
        gLogger.info( "Available fields are: %s" % res['ParameterNames'].join( ' ' ) )
      elif not res['Value']:
        gLogger.info( "No tasks found for selection" )
      else:
        self._printFormattedDictList( res['Value'], outputFields, 'TransformationID', orderBy )
    return res

  #############################################################################
  def getSummaryTransformations( self , transID = [] ):
    """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
    condDict = { 'TransformationID' : transID }
    orderby = []
    start = 0
    maxitems = len( transID )
    paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \
                      'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \
                      'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled']
    # Below, the header used for each field in the printing: short to fit in one line
    paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \
                           'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled']
    dictList = []

    result = self.transClient.getTransformationSummaryWeb( condDict, orderby, start, maxitems )
    if not result['OK']:
      self._prettyPrint( result )
      return result

    if result['Value']['TotalRecords'] > 0:
      try:
        paramNames = result['Value']['ParameterNames']
        for paramValues in result['Value']['Records']:
          paramShowValues = map( lambda pname: paramValues[ paramNames.index( pname ) ], paramShowNames )
          showDict = dict( zip( paramShowNamesShort, paramShowValues ) )
          dictList.append( showDict )

      except Exception as x:
        print 'Exception %s ' % str( x )

    if not len( dictList ) > 0:
      gLogger.error( 'No found transformations satisfying input condition' )
      return S_ERROR( 'No found transformations satisfying input condition' )
    else:
      print self._printFormattedDictList( dictList, paramShowNamesShort, paramShowNamesShort[0], paramShowNamesShort[0] )

    return S_OK( dictList )

  #############################################################################
  def addTransformation( self, addFiles = True, printOutput = False ):
    res = self._checkCreation()
    if not res['OK']:
      return self._errorReport( res, 'Failed transformation sanity check' )
    if printOutput:
      gLogger.info( "Will attempt to create transformation with the following parameters" )
      self._prettyPrint( self.paramValues )

    res = self.transClient.addTransformation( self.paramValues['TransformationName'],
                                              self.paramValues['Description'],
                                              self.paramValues['LongDescription'],
                                              self.paramValues['Type'],
                                              self.paramValues['Plugin'],
                                              self.paramValues['AgentType'],
                                              self.paramValues['FileMask'],
                                              transformationGroup = self.paramValues['TransformationGroup'],
                                              groupSize = self.paramValues['GroupSize'],
                                              inheritedFrom = self.paramValues['InheritedFrom'],
                                              body = self.paramValues['Body'],
                                              maxTasks = self.paramValues['MaxNumberOfTasks'],
                                              eventsPerTask = self.paramValues['EventsPerTask'],
                                              addFiles = addFiles )
    if not res['OK']:
      if printOutput:
        self._prettyPrint( res )
      return res
    transID = res['Value']
    self.exists = True
    self.setTransformationID( transID )
    gLogger.notice( "Created transformation %d" % transID )
    for paramName, paramValue in self.paramValues.items():
      if paramName not in self.paramTypes:
        res = self.transClient.setTransformationParameter( transID, paramName, paramValue )
        if not res['OK']:
          gLogger.error( "Failed to add parameter", "%s %s" % ( paramName, res['Message'] ) )
          gLogger.notice( "To add this parameter later please execute the following." )
          gLogger.notice( "oTransformation = Transformation(%d)" % transID )
          gLogger.notice( "oTransformation.set%s(...)" % paramName )
    return S_OK( transID )

  def _checkCreation( self ):
    """ Few checks
    """
    if self.paramValues['TransformationID']:
      gLogger.info( "You are currently working with an active transformation definition." )
      gLogger.info( "If you wish to create a new transformation reset the TransformationID." )
      gLogger.info( "oTransformation.reset()" )
      return S_ERROR()

    requiredParameters = ['TransformationName', 'Description' , 'LongDescription', 'Type']
    for parameter in requiredParameters:
      if not self.paramValues[parameter]:
        gLogger.info( "%s is not defined for this transformation. This is required..." % parameter )
        self.paramValues[parameter] = raw_input( "Please enter the value of " + parameter + " " )

    plugin = self.paramValues['Plugin']
    if plugin:
      if not plugin in self.supportedPlugins:
        gLogger.info( "The selected Plugin (%s) is not known to the transformation agent." % plugin )
        res = self.__promptForParameter( 'Plugin', choices = self.supportedPlugins, default = 'Standard' )
        if not res['OK']:
          return res
        self.paramValues['Plugin'] = res['Value']

    plugin = self.paramValues['Plugin']

    return S_OK()

  def _checkBySizePlugin( self ):
    return self._checkStandardPlugin()

  def _checkBySharePlugin( self ):
    return self._checkStandardPlugin()

  def _checkStandardPlugin( self ):
    groupSize = self.paramValues['GroupSize']
    if groupSize <= 0:
      gLogger.info( "The GroupSize was found to be less than zero. It has been set to 1." )
      res = self.setGroupSize( 1 )
      if not res['OK']:
        return res
    return S_OK()

  def _checkBroadcastPlugin( self ):
    gLogger.info( "The Broadcast plugin requires the following parameters be set: %s" % ( ', '.join( ['SourceSE',
                                                                                                      'TargetSE'] ) ) )
    requiredParams = ['SourceSE', 'TargetSE']
    for requiredParam in requiredParams:
      if not self.paramValues.get( requiredParam ):
        paramValue = raw_input( "Please enter " + requiredParam + " " )
        setter = None
        setterName = "set%s" % requiredParam
        if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
          setter = getattr( self, setterName )
        if not setter:
          return S_ERROR( "Unable to invoke %s, this function hasn't been implemented." % setterName )
        ses = paramValue.replace( ',', ' ' ).split()
        res = setter( ses )
        if not res['OK']:
          return res
    return S_OK()

  def __checkSEs( self, seList ):
    res = gConfig.getSections( '/Resources/StorageElements' )
    if not res['OK']:
      return self._errorReport( res, 'Failed to get possible StorageElements' )
    missing = set( seList ) - set( res['Value'] )
    if missing:
      for se in missing:
        gLogger.error( "StorageElement %s is not known" % se )
      return S_ERROR( "%d StorageElements not known" % len( missing ) )
    return S_OK()

  def __promptForParameter( self, parameter, choices = [], default = '', insert = True ):
    res = promptUser( "Please enter %s" % parameter, choices = choices, default = default )
    if not res['OK']:
      return self._errorReport( res )
    gLogger.notice( "%s will be set to '%s'" % ( parameter, res['Value'] ) )
    paramValue = res['Value']
    if insert:
      setter = None
      setterName = "set%s" % parameter
      if hasattr( self, setterName ) and callable( getattr( self, setterName ) ):
        setter = getattr( self, setterName )
      if not setter:
        return S_ERROR( "Unable to invoke %s, it isn't a member function of Transformation!" )
      res = setter( paramValue )
      if not res['OK']:
        return res
    return S_OK( paramValue )
Exemplo n.º 27
0
class TransformationCleaningAgent(AgentModule):
    """
    .. class:: TransformationCleaningAgent

    :param ~DIRAC.DataManagementSystem.Client.DataManager.DataManager dm: DataManager instance
    :param ~TransformationClient.TransformationClient transClient: TransformationClient instance
    :param ~FileCatalogClient.FileCatalogClient metadataClient: FileCatalogClient instance

    """
    def __init__(self, *args, **kwargs):
        """c'tor"""
        AgentModule.__init__(self, *args, **kwargs)

        self.shifterProxy = None

        # # transformation client
        self.transClient = None
        # # wms client
        self.wmsClient = None
        # # request client
        self.reqClient = None
        # # file catalog client
        self.metadataClient = None

        # # transformations types
        self.transformationTypes = None
        # # directory locations
        self.directoryLocations = ["TransformationDB", "MetadataCatalog"]
        # # transformation metadata
        self.transfidmeta = "TransformationID"
        # # archive periof in days
        self.archiveAfter = 7
        # # transformation log SEs
        self.logSE = "LogSE"
        # # enable/disable execution
        self.enableFlag = "True"

        self.dataProcTTypes = ["MCSimulation", "Merge"]
        self.dataManipTTypes = ["Replication", "Removal"]

    def initialize(self):
        """agent initialisation

        reading and setting config opts

        :param self: self reference
        """
        # # shifter proxy
        # See cleanContent method: this proxy will be used ALSO when the file catalog used
        # is the DIRAC File Catalog (DFC).
        # This is possible because of unset of the "UseServerCertificate" option
        self.shifterProxy = self.am_getOption("shifterProxy",
                                              self.shifterProxy)

        # # transformations types
        self.dataProcTTypes = Operations().getValue(
            "Transformations/DataProcessing", self.dataProcTTypes)
        self.dataManipTTypes = Operations().getValue(
            "Transformations/DataManipulation", self.dataManipTTypes)
        agentTSTypes = self.am_getOption("TransformationTypes", [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            self.transformationTypes = sorted(self.dataProcTTypes +
                                              self.dataManipTTypes)
        self.log.info("Will consider the following transformation types: %s" %
                      str(self.transformationTypes))
        # # directory locations
        self.directoryLocations = sorted(
            self.am_getOption("DirectoryLocations", self.directoryLocations))
        self.log.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        # # transformation metadata
        self.transfidmeta = self.am_getOption("TransfIDMeta",
                                              self.transfidmeta)
        self.log.info("Will use %s as metadata tag name for TransformationID" %
                      self.transfidmeta)
        # # archive periof in days
        self.archiveAfter = self.am_getOption("ArchiveAfter",
                                              self.archiveAfter)  # days
        self.log.info("Will archive Completed transformations after %d days" %
                      self.archiveAfter)
        # # transformation log SEs
        self.logSE = Operations().getValue("/LogStorage/LogSE", self.logSE)
        self.log.info("Will remove logs found on storage element: %s" %
                      self.logSE)

        # # transformation client
        self.transClient = TransformationClient()
        # # wms client
        self.wmsClient = WMSClient()
        # # request client
        self.reqClient = ReqClient()
        # # file catalog client
        self.metadataClient = FileCatalogClient()
        # # job monitoring client
        self.jobMonitoringClient = JobMonitoringClient()

        return S_OK()

    #############################################################################
    def execute(self):
        """execution in one agent's cycle

        :param self: self reference
        """

        self.enableFlag = self.am_getOption("EnableFlag", self.enableFlag)
        if self.enableFlag != "True":
            self.log.info(
                "TransformationCleaningAgent is disabled by configuration option EnableFlag"
            )
            return S_OK("Disabled via CS flag")

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            "Status":
            "Cleaning",
            "Type":
            self.transformationTypes
        })
        if res["OK"]:
            for transDict in res["Value"]:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])
        else:
            self.log.error("Failed to get transformations", res["Message"])

        # Obtain the transformations in RemovingFiles status and removes the output files
        res = self.transClient.getTransformations({
            "Status":
            "RemovingFiles",
            "Type":
            self.transformationTypes
        })
        if res["OK"]:
            for transDict in res["Value"]:
                if self.shifterProxy:
                    self._executeRemoval(transDict)
                else:
                    self.log.info(
                        "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeRemoval)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])
        else:
            self.log.error("Could not get the transformations", res["Message"])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                "Status": "Completed",
                "Type": self.transformationTypes
            },
            older=olderThanTime,
            timeStamp="LastUpdate")
        if res["OK"]:
            for transDict in res["Value"]:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])
        else:
            self.log.error("Could not get the transformations", res["Message"])
        return S_OK()

    def finalize(self):
        """Only at finalization: will clean ancient transformations (remnants)

            1) get the transformation IDs of jobs that are older than 1 year
            2) find the status of those transformations. Those "Cleaned" and "Archived" will be
               cleaned and archived (again)

        Why doing this here? Basically, it's a race:

        1) the production manager submits a transformation
        2) the TransformationAgent, and a bit later the WorkflowTaskAgent, put such transformation in their internal queue,
           so eventually during their (long-ish) cycle they'll work on it.
        3) 1 minute after creating the transformation, the production manager cleans it (by hand, for whatever reason).
           So, the status is changed to "Cleaning"
        4) the TransformationCleaningAgent cleans what has been created (maybe, nothing),
           then sets the transformation status to "Cleaned" or "Archived"
        5) a bit later the TransformationAgent, and later the WorkflowTaskAgent, kick in,
           creating tasks and jobs for a production that's effectively cleaned (but these 2 agents don't know yet).

        Of course, one could make one final check in TransformationAgent or WorkflowTaskAgent,
        but these 2 agents are already doing a lot of stuff, and are pretty heavy.
        So, we should just clean from time to time.
        What I added here is done only when the agent finalize, and it's quite light-ish operation anyway.
        """
        res = self.jobMonitoringClient.getJobGroups(
            None,
            datetime.utcnow() - timedelta(days=365))
        if not res["OK"]:
            self.log.error("Failed to get job groups", res["Message"])
            return res
        transformationIDs = res["Value"]
        if transformationIDs:
            res = self.transClient.getTransformations(
                {"TransformationID": transformationIDs})
            if not res["OK"]:
                self.log.error("Failed to get transformations", res["Message"])
                return res
            transformations = res["Value"]
            toClean = []
            toArchive = []
            for transDict in transformations:
                if transDict["Status"] == "Cleaned":
                    toClean.append(transDict)
                if transDict["Status"] == "Archived":
                    toArchive.append(transDict)

            for transDict in toClean:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])

            for transDict in toArchive:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])

            # Remove JobIDs that were unknown to the TransformationSystem
            jobGroupsToCheck = [
                str(transDict["TransformationID"]).zfill(8)
                for transDict in toClean + toArchive
            ]
            res = self.jobMonitoringClient.getJobs(
                {"JobGroup": jobGroupsToCheck})
            if not res["OK"]:
                return res
            jobIDsToRemove = [int(jobID) for jobID in res["Value"]]
            res = self.__removeWMSTasks(jobIDsToRemove)
            if not res["OK"]:
                return res

        return S_OK()

    def _executeClean(self, transDict):
        """Clean transformation."""
        # if transformation is of type `Replication` or `Removal`, there is nothing to clean.
        # We just archive
        if transDict["Type"] in self.dataManipTTypes:
            res = self.archiveTransformation(transDict["TransformationID"])
            if not res["OK"]:
                self.log.error(
                    "Problems archiving transformation",
                    "%s: %s" % (transDict["TransformationID"], res["Message"]))
        else:
            res = self.cleanTransformation(transDict["TransformationID"])
            if not res["OK"]:
                self.log.error(
                    "Problems cleaning transformation",
                    "%s: %s" % (transDict["TransformationID"], res["Message"]))

    def _executeRemoval(self, transDict):
        """Remove files from given transformation."""
        res = self.removeTransformationOutput(transDict["TransformationID"])
        if not res["OK"]:
            self.log.error(
                "Problems removing transformation",
                "%s: %s" % (transDict["TransformationID"], res["Message"]))

    def _executeArchive(self, transDict):
        """Archive the given transformation."""
        res = self.archiveTransformation(transDict["TransformationID"])
        if not res["OK"]:
            self.log.error(
                "Problems archiving transformation",
                "%s: %s" % (transDict["TransformationID"], res["Message"]))

        return S_OK()

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """get the directories for the supplied transformation from the transformation system.
            These directories are used by removeTransformationOutput and cleanTransformation for removing output.

        :param self: self reference
        :param int transID: transformation ID
        """
        self.log.verbose(
            "Cleaning Transformation directories of transformation %d" %
            transID)
        directories = []
        if "TransformationDB" in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ["OutputDirectories"])
            if not res["OK"]:
                self.log.error("Failed to obtain transformation directories",
                               res["Message"])
                return res
            transDirectories = []
            if res["Value"]:
                if not isinstance(res["Value"], list):
                    try:
                        transDirectories = ast.literal_eval(res["Value"])
                    except Exception:
                        # It can happen if the res['Value'] is '/a/b/c' instead of '["/a/b/c"]'
                        transDirectories.append(res["Value"])
                else:
                    transDirectories = res["Value"]
            directories = self._addDirs(transID, transDirectories, directories)

        if "MetadataCatalog" in self.directoryLocations:
            res = self.metadataClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res["OK"]:
                self.log.error("Failed to obtain metadata catalog directories",
                               res["Message"])
                return res
            transDirectories = res["Value"]
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @classmethod
    def _addDirs(cls, transID, newDirs, existingDirs):
        """append unique :newDirs: list to :existingDirs: list

        :param self: self reference
        :param int transID: transformationID
        :param list newDirs: src list of paths
        :param list existingDirs: dest list of paths
        """
        for folder in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, str(folder)):
                if folder not in existingDirs:
                    existingDirs.append(os.path.normpath(folder))
        return existingDirs

    #############################################################################
    #
    # These are the methods for performing the cleaning of catalogs and storage
    #

    def cleanContent(self, directory):
        """wipe out everything from catalog under folder :directory:

        :param self: self reference
        :params str directory: folder name
        """
        self.log.verbose("Cleaning Catalog contents")
        res = self.__getCatalogDirectoryContents([directory])
        if not res["OK"]:
            return res
        filesFound = res["Value"]
        if not filesFound:
            self.log.info(
                "No files are registered in the catalog directory %s" %
                directory)
            return S_OK()
        self.log.info(
            "Attempting to remove possible remnants from the catalog and storage",
            "(n=%d)" % len(filesFound))

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "false")
        res = DataManager().removeFile(filesFound, force=True)
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "true")

        if not res["OK"]:
            return res
        realFailure = False
        for lfn, reason in res["Value"]["Failed"].items():
            if "File does not exist" in str(reason):
                self.log.warn("File %s not found in some catalog: " % (lfn))
            else:
                self.log.error("Failed to remove file found in the catalog",
                               "%s %s" % (lfn, reason))
                realFailure = True
        if realFailure:
            return S_ERROR("Failed to remove all files found in the catalog")
        return S_OK()

    def __getCatalogDirectoryContents(self, directories):
        """get catalog contents under paths :directories:

        :param self: self reference
        :param list directories: list of paths in catalog
        """
        self.log.info("Obtaining the catalog contents for %d directories:" %
                      len(directories))
        for directory in directories:
            self.log.info(directory)
        activeDirs = directories
        allFiles = {}
        fc = FileCatalog()
        while activeDirs:
            currentDir = activeDirs[0]
            res = returnSingleResult(fc.listDirectory(currentDir))
            activeDirs.remove(currentDir)
            if not res["OK"] and "Directory does not exist" in res[
                    "Message"]:  # FIXME: DFC should return errno
                self.log.info("The supplied directory %s does not exist" %
                              currentDir)
            elif not res["OK"]:
                if "No such file or directory" in res["Message"]:
                    self.log.info("%s: %s" % (currentDir, res["Message"]))
                else:
                    self.log.error(
                        "Failed to get directory %s content" % currentDir,
                        res["Message"])
            else:
                dirContents = res["Value"]
                activeDirs.extend(dirContents["SubDirs"])
                allFiles.update(dirContents["Files"])
        self.log.info("", "Found %d files" % len(allFiles))
        return S_OK(list(allFiles))

    def cleanTransformationLogFiles(self, directory):
        """clean up transformation logs from directory :directory:

        :param self: self reference
        :param str directory: folder name
        """
        self.log.verbose("Removing log files found in the directory",
                         directory)
        res = returnSingleResult(
            StorageElement(self.logSE).removeDirectory(directory,
                                                       recursive=True))
        if not res["OK"]:
            if cmpError(res, errno.ENOENT):  # No such file or directory
                self.log.warn("Transformation log directory does not exist",
                              directory)
                return S_OK()
            self.log.error("Failed to remove log files", res["Message"])
            return res
        self.log.info("Successfully removed transformation log directory")
        return S_OK()

    #############################################################################
    #
    # These are the functional methods for archiving and cleaning transformations
    #

    def removeTransformationOutput(self, transID):
        """This just removes any mention of the output data from the catalog and storage"""
        self.log.info("Removing output data for transformation %s" % transID)
        res = self.getTransformationDirectories(transID)
        if not res["OK"]:
            self.log.error("Problem obtaining directories for transformation",
                           "%s with result '%s'" % (transID, res))
            return S_OK()
        directories = res["Value"]
        for directory in directories:
            if not re.search("/LOG/", directory):
                res = self.cleanContent(directory)
                if not res["OK"]:
                    return res

        self.log.info("Removed %d directories from the catalog \
      and its files from the storage for transformation %s" %
                      (len(directories), transID))
        # Clean ALL the possible remnants found in the metadata catalog
        res = self.cleanMetadataCatalogFiles(transID)
        if not res["OK"]:
            return res
        self.log.info("Successfully removed output of transformation", transID)
        # Change the status of the transformation to RemovedFiles
        res = self.transClient.setTransformationParameter(
            transID, "Status", "RemovedFiles")
        if not res["OK"]:
            self.log.error(
                "Failed to update status of transformation %s to RemovedFiles"
                % (transID), res["Message"])
            return res
        self.log.info("Updated status of transformation %s to RemovedFiles" %
                      (transID))
        return S_OK()

    def archiveTransformation(self, transID):
        """This just removes job from the jobDB and the transformation DB

        :param self: self reference
        :param int transID: transformation ID
        """
        self.log.info("Archiving transformation %s" % transID)
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res["OK"]:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res["OK"]:
            return res
        self.log.info("Successfully archived transformation %d" % transID)
        # Change the status of the transformation to archived
        res = self.transClient.setTransformationParameter(
            transID, "Status", "Archived")
        if not res["OK"]:
            self.log.error(
                "Failed to update status of transformation %s to Archived" %
                (transID), res["Message"])
            return res
        self.log.info("Updated status of transformation %s to Archived" %
                      (transID))
        return S_OK()

    def cleanTransformation(self, transID):
        """This removes what was produced by the supplied transformation,
        leaving only some info and log in the transformation DB.
        """
        self.log.info("Cleaning transformation", transID)
        res = self.getTransformationDirectories(transID)
        if not res["OK"]:
            self.log.error("Problem obtaining directories for transformation",
                           "%s with result '%s'" % (transID, res["Message"]))
            return S_OK()
        directories = res["Value"]
        # Clean the jobs in the WMS and any failover requests found
        res = self.cleanTransformationTasks(transID)
        if not res["OK"]:
            return res
        # Clean the log files for the jobs
        for directory in directories:
            if re.search("/LOG/", directory):
                res = self.cleanTransformationLogFiles(directory)
                if not res["OK"]:
                    return res
            res = self.cleanContent(directory)
            if not res["OK"]:
                return res

        # Clean ALL the possible remnants found
        res = self.cleanMetadataCatalogFiles(transID)
        if not res["OK"]:
            return res
        # Clean the transformation DB of the files and job information
        res = self.transClient.cleanTransformation(transID)
        if not res["OK"]:
            return res
        self.log.info("Successfully cleaned transformation", transID)
        res = self.transClient.setTransformationParameter(
            transID, "Status", "Cleaned")
        if not res["OK"]:
            self.log.error(
                "Failed to update status of transformation %s to Cleaned" %
                (transID), res["Message"])
            return res
        self.log.info("Updated status of transformation",
                      "%s to Cleaned" % (transID))
        return S_OK()

    def cleanMetadataCatalogFiles(self, transID):
        """wipe out files from catalog"""
        res = self.metadataClient.findFilesByMetadata(
            {self.transfidmeta: transID})
        if not res["OK"]:
            return res
        fileToRemove = res["Value"]
        if not fileToRemove:
            self.log.info("No files found for transID", transID)
            return S_OK()

        # Executing with shifter proxy
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "false")
        res = DataManager().removeFile(fileToRemove, force=True)
        gConfigurationData.setOptionInCFG(
            "/DIRAC/Security/UseServerCertificate", "true")

        if not res["OK"]:
            return res
        for lfn, reason in res["Value"]["Failed"].items():
            self.log.error("Failed to remove file found in metadata catalog",
                           "%s %s" % (lfn, reason))
        if res["Value"]["Failed"]:
            return S_ERROR(
                "Failed to remove all files found in the metadata catalog")
        self.log.info("Successfully removed all files found in the DFC")
        return S_OK()

    #############################################################################
    #
    # These are the methods for removing the jobs from the WMS and transformation DB
    #

    def cleanTransformationTasks(self, transID):
        """clean tasks from WMS, or from the RMS if it is a DataManipulation transformation"""
        self.log.verbose("Cleaning Transformation tasks of transformation",
                         transID)
        res = self.__getTransformationExternalIDs(transID)
        if not res["OK"]:
            return res
        externalIDs = res["Value"]
        if externalIDs:
            res = self.transClient.getTransformationParameters(
                transID, ["Type"])
            if not res["OK"]:
                self.log.error("Failed to determine transformation type")
                return res
            transType = res["Value"]
            if transType in self.dataProcTTypes:
                res = self.__removeWMSTasks(externalIDs)
            else:
                res = self.__removeRequests(externalIDs)
            if not res["OK"]:
                return res
        return S_OK()

    def __getTransformationExternalIDs(self, transID):
        """collect all ExternalIDs for transformation :transID:

        :param self: self reference
        :param int transID: transforamtion ID
        """
        res = self.transClient.getTransformationTasks(
            condDict={"TransformationID": transID})
        if not res["OK"]:
            self.log.error(
                "Failed to get externalIDs for transformation %d" % transID,
                res["Message"])
            return res
        externalIDs = [taskDict["ExternalID"] for taskDict in res["Value"]]
        self.log.info("Found %d tasks for transformation" % len(externalIDs))
        return S_OK(externalIDs)

    def __removeRequests(self, requestIDs):
        """This will remove requests from the RMS system -"""
        rIDs = [int(int(j)) for j in requestIDs if int(j)]
        for reqID in rIDs:
            self.reqClient.cancelRequest(reqID)

        return S_OK()

    def __removeWMSTasks(self, transJobIDs):
        """delete jobs (mark their status as "JobStatus.DELETED") and their requests from the system

        :param self: self reference
        :param list trasnJobIDs: job IDs
        """
        # Prevent 0 job IDs
        jobIDs = [int(j) for j in transJobIDs if int(j)]
        allRemove = True
        for jobList in breakListIntoChunks(jobIDs, 500):

            res = self.wmsClient.killJob(jobList)
            if res["OK"]:
                self.log.info("Successfully killed %d jobs from WMS" %
                              len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found jobs which did not exist in the WMS",
                              "(n=%d)" % len(res["InvalidJobIDs"]))
            elif "NonauthorizedJobIDs" in res:
                self.log.error("Failed to kill jobs because not authorized",
                               "(n=%d)" % len(res["NonauthorizedJobIDs"]))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to kill jobs",
                               "(n=%d)" % len(res["FailedJobIDs"]))
                allRemove = False

            res = self.wmsClient.deleteJob(jobList)
            if res["OK"]:
                self.log.info("Successfully deleted jobs from WMS",
                              "(n=%d)" % len(jobList))
            elif ("InvalidJobIDs" in res) and ("NonauthorizedJobIDs"
                                               not in res) and ("FailedJobIDs"
                                                                not in res):
                self.log.info("Found jobs which did not exist in the WMS",
                              "(n=%d)" % len(res["InvalidJobIDs"]))
            elif "NonauthorizedJobIDs" in res:
                self.log.error("Failed to delete jobs because not authorized",
                               "(n=%d)" % len(res["NonauthorizedJobIDs"]))
                allRemove = False
            elif "FailedJobIDs" in res:
                self.log.error("Failed to delete jobs",
                               "(n=%d)" % len(res["FailedJobIDs"]))
                allRemove = False

        if not allRemove:
            return S_ERROR("Failed to delete all remnants from WMS")
        self.log.info("Successfully deleted all tasks from the WMS")

        if not jobIDs:
            self.log.info(
                "JobIDs not present, unable to delete associated requests.")
            return S_OK()

        failed = 0
        failoverRequests = {}
        res = self.reqClient.getRequestIDsForJobs(jobIDs)
        if not res["OK"]:
            self.log.error("Failed to get requestID for jobs.", res["Message"])
            return res
        failoverRequests.update(res["Value"]["Successful"])
        if not failoverRequests:
            return S_OK()
        for jobID, requestID in res["Value"]["Successful"].items():
            # Put this check just in case, tasks must have associated jobs
            if jobID == 0 or jobID == "0":
                continue
            res = self.reqClient.cancelRequest(requestID)
            if not res["OK"]:
                self.log.error("Failed to remove request from RequestDB",
                               res["Message"])
                failed += 1
            else:
                self.log.verbose("Removed request %s associated to job %d." %
                                 (requestID, jobID))

        if failed:
            self.log.info("Successfully removed requests",
                          "(n=%d)" % (len(failoverRequests) - failed))
            self.log.info("Failed to remove requests", "(n=%d)" % failed)
            return S_ERROR("Failed to remove all the request from RequestDB")
        self.log.info(
            "Successfully removed all the associated failover requests")
        return S_OK()
Exemplo n.º 28
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError, 'TransformationID %d does not exist' % transID
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def __setSE(self, se, seList):
        if type(seList) in types.StringTypes:
            try:
                seList = eval(seList)
            except:
                seList = seList.replace(',', ' ').split()
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = se
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError, name

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError, "Unknown parameter for transformation: %s" % self.item_called

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            oldValue = self.paramValues[self.item_called]
            if oldValue != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError, "%s %s %s expected one of %s" % (
                        self.item_called, value, type(value),
                        self.paramTypes[self.item_called])
        if not self.item_called in self.paramTypes.keys():
            if not self.paramValues.has_key(self.item_called):
                change = True
            else:
                oldValue = self.paramValues[self.item_called]
                if oldValue != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res['OK']:
            return self._errorReport(res, 'Failed transformation sanity check')
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues['TransformationName'],
            self.paramValues['Description'],
            self.paramValues['LongDescription'],
            self.paramValues['Type'],
            self.paramValues['Plugin'],
            self.paramValues['AgentType'],
            self.paramValues['FileMask'],
            transformationGroup=self.paramValues['TransformationGroup'],
            groupSize=self.paramValues['GroupSize'],
            inheritedFrom=self.paramValues['InheritedFrom'],
            body=self.paramValues['Body'],
            maxTasks=self.paramValues['MaxNumberOfTasks'],
            eventsPerTask=self.paramValues['EventsPerTask'],
            addFiles=addFiles)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res['Value']
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if not self.paramTypes.has_key(paramName):
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res['OK']:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res['Message']))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        if self.paramValues['TransformationID']:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            'TransformationName', 'Description', 'LongDescription', 'Type'
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = raw_input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues['Plugin']
        if not plugin in self.supportedPlugins:
            gLogger.info(
                "The selected Plugin (%s) is not known to the transformation agent."
                % plugin)
            res = self.__promptForParameter('Plugin',
                                            choices=self.supportedPlugins,
                                            default='Standard')
            if not res['OK']:
                return res
            self.paramValues['Plugin'] = res['Value']

        plugin = self.paramValues['Plugin']
        #checkPlugin = "_check%sPlugin" % plugin
        #fcn = None
        #if hasattr( self, checkPlugin ) and callable( getattr( self, checkPlugin ) ):
        #  fcn = getattr( self, checkPlugin )
        #if not fcn:
        #  return S_ERROR( "Unable to invoke %s, it isn't a member function" % checkPlugin )
        #res = fcn()
        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues['GroupSize']
        if (groupSize <= 0):
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res['OK']:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (', '.join(['SourceSE', 'TargetSE'])))
        requiredParams = ['SourceSE', 'TargetSE']
        for requiredParam in requiredParams:
            if (not self.paramValues.has_key(requiredParam)) or (
                    not self.paramValues[requiredParam]):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(',', ' ').split()
                res = setter(ses)
                if not res['OK']:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections('/Resources/StorageElements')
        if not res['OK']:
            return self._errorReport(res,
                                     'Failed to get possible StorageElements')
        missing = []
        for se in seList:
            if not se in res['Value']:
                gLogger.error("StorageElement %s is not known" % se)
                missing.append(se)
        if missing:
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default='',
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res['OK']:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res['Value']))
        paramValue = res['Value']
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res['OK']:
                return res
        return S_OK(paramValue)
Exemplo n.º 29
0
    gLogger.error("Production ID is 0 or Tasks is 0, cannot be")
    dexit(1)
    
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tc = TransformationClient()
  res = tc.getTransformation(clip.prod)
  trans= res['Value']
  transp = trans['Plugin']
  if transp != 'Limited':
    gLogger.error("This cannot be used on productions that are not using the 'Limited' plugin")
    dexit(0)
  
  gLogger.info("Prod %s has %s tasks registered" % (clip.prod, trans['MaxNumberOfTasks']) )
  if clip.tasks >0:
    max_tasks = trans['MaxNumberOfTasks'] + clip.tasks  
    groupsize = trans['GroupSize']
    gLogger.notice("Adding %s tasks (%s file(s)) to production %s" %(clip.tasks, clip.tasks*groupsize, clip.prod))
  elif clip.tasks <0:
    max_tasks = -1
    gLogger.notice("Now all existing files in the DB for production %s will be processed." % clip.prod)
  else:
    gLogger.error("Number of tasks must be different from 0")
    dexit(1)
  res = tc.setTransformationParameter(clip.prod, 'MaxNumberOfTasks', max_tasks)
  if not res['OK']:
    gLogger.error(res['Message'])
    dexit(1)
  gLogger.notice("Production %s extended!" % clip.prod)
    
  dexit(0)
Exemplo n.º 30
0
class Transformation(API):

    #############################################################################
    def __init__(self, transID=0, transClient=None):
        """ c'tor
    """
        super(Transformation, self).__init__()

        self.paramTypes = {
            'TransformationID': [types.IntType, types.LongType],
            'TransformationName': types.StringTypes,
            'Status': types.StringTypes,
            'Description': types.StringTypes,
            'LongDescription': types.StringTypes,
            'Type': types.StringTypes,
            'Plugin': types.StringTypes,
            'AgentType': types.StringTypes,
            'FileMask': types.StringTypes,
            'TransformationGroup': types.StringTypes,
            'GroupSize': [types.IntType, types.LongType, types.FloatType],
            'InheritedFrom': [types.IntType, types.LongType],
            'Body': types.StringTypes,
            'MaxNumberOfTasks': [types.IntType, types.LongType],
            'EventsPerTask': [types.IntType, types.LongType]
        }
        self.paramValues = {
            'TransformationID': 0,
            'TransformationName': '',
            'Status': 'New',
            'Description': '',
            'LongDescription': '',
            'Type': '',
            'Plugin': 'Standard',
            'AgentType': 'Manual',
            'FileMask': '',
            'TransformationGroup': 'General',
            'GroupSize': 1,
            'InheritedFrom': 0,
            'Body': '',
            'MaxNumberOfTasks': 0,
            'EventsPerTask': 0
        }
        self.ops = Operations()
        self.supportedPlugins = self.ops.getValue(
            'Transformations/AllowedPlugins',
            ['Broadcast', 'Standard', 'BySize', 'ByShare'])
        if not transClient:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        self.serverURL = self.transClient.getServer()
        self.exists = False
        if transID:
            self.paramValues['TransformationID'] = transID
            res = self.getTransformation()
            if res['OK']:
                self.exists = True
            elif res['Message'] == 'Transformation does not exist':
                raise AttributeError('TransformationID %d does not exist' %
                                     transID)
            else:
                self.paramValues['TransformationID'] = 0
                gLogger.fatal(
                    "Failed to get transformation from database",
                    "%s @ %s" % (transID, self.transClient.serverURL))

    def setServer(self, server):
        self.serverURL = server
        self.transClient.setServer(self.serverURL)

    def getServer(self):
        return self.serverURL

    def reset(self, transID=0):
        self.__init__(transID)
        self.transClient.setServer(self.serverURL)
        return S_OK()

    def setTargetSE(self, seList):
        return self.__setSE('TargetSE', seList)

    def setSourceSE(self, seList):
        return self.__setSE('SourceSE', seList)

    def setBody(self, body):
        """ check that the body is a string, or using the proper syntax for multiple operations

    :param body: transformation body, for example

      .. code :: python

        body = [ ( "ReplicateAndRegister", { "SourceSE":"FOO-SRM", "TargetSE":"BAR-SRM" }),
                 ( "RemoveReplica", { "TargetSE":"FOO-SRM" } ),
               ]

    :type body: string or list of tuples (or lists) of string and dictionaries
    :raises TypeError: If the structure is not as expected
    :raises ValueError: If unknown attribute for the :class:`~DIRAC.RequestManagementSystem.Client.Operation.Operation` is used
    :returns: S_OK, S_ERROR
    """
        self.item_called = "Body"
        if isinstance(body, basestring):
            return self.__setParam(body)
        if not isinstance(body, (list, tuple)):
            raise TypeError("Expected list or string, but %r is %s" %
                            (body, type(body)))

        for tup in body:
            if not isinstance(tup, (tuple, list)):
                raise TypeError("Expected tuple or list, but %r is %s" %
                                (tup, type(tup)))
            if len(tup) != 2:
                raise TypeError("Expected 2-tuple, but %r is length %d" %
                                (tup, len(tup)))
            if not isinstance(tup[0], basestring):
                raise TypeError(
                    "Expected string, but first entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            if not isinstance(tup[1], dict):
                raise TypeError(
                    "Expected dictionary, but second entry in tuple %r is %s" %
                    (tup, type(tup[0])))
            for par, val in tup[1].iteritems():
                if not isinstance(par, basestring):
                    raise TypeError(
                        "Expected string, but key in dictionary %r is %s" %
                        (par, type(par)))
                if not par in Operation.ATTRIBUTE_NAMES:
                    raise ValueError("Unknown attribute for Operation: %s" %
                                     par)
                if not isinstance(
                        val,
                    (basestring, int, long, float, list, tuple, dict)):
                    raise TypeError("Cannot encode %r, in json" % (val))
            return self.__setParam(json.dumps(body))

    def __setSE(self, seParam, seList):
        if isinstance(seList, basestring):
            try:
                seList = eval(seList)
            except BaseException:
                seList = seList.split(',')
        elif isinstance(seList, (list, dict, tuple)):
            seList = list(seList)
        else:
            return S_ERROR("Bad argument type")
        res = self.__checkSEs(seList)
        if not res['OK']:
            return res
        self.item_called = seParam
        return self.__setParam(seList)

    def __getattr__(self, name):
        if name.find('get') == 0:
            item = name[3:]
            self.item_called = item
            return self.__getParam
        if name.find('set') == 0:
            item = name[3:]
            self.item_called = item
            return self.__setParam
        raise AttributeError(name)

    def __getParam(self):
        if self.item_called == 'Available':
            return S_OK(self.paramTypes.keys())
        if self.item_called == 'Parameters':
            return S_OK(self.paramValues)
        if self.item_called in self.paramValues:
            return S_OK(self.paramValues[self.item_called])
        raise AttributeError("Unknown parameter for transformation: %s" %
                             self.item_called)

    def __setParam(self, value):
        change = False
        if self.item_called in self.paramTypes:
            if self.paramValues[self.item_called] != value:
                if type(value) in self.paramTypes[self.item_called]:
                    change = True
                else:
                    raise TypeError("%s %s %s expected one of %s" %
                                    (self.item_called, value, type(value),
                                     self.paramTypes[self.item_called]))
        else:
            if self.item_called not in self.paramValues:
                change = True
            else:
                if self.paramValues[self.item_called] != value:
                    change = True
        if not change:
            gLogger.verbose("No change of parameter %s required" %
                            self.item_called)
        else:
            gLogger.verbose("Parameter %s to be changed" % self.item_called)
            transID = self.paramValues['TransformationID']
            if self.exists and transID:
                res = self.transClient.setTransformationParameter(
                    transID, self.item_called, value)
                if not res['OK']:
                    return res
            self.paramValues[self.item_called] = value
        return S_OK()

    def getTransformation(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformation(transID, extraParams=True)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transParams = res['Value']
        for paramName, paramValue in transParams.items():
            setter = None
            setterName = "set%s" % paramName
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setterName:
                gLogger.error(
                    "Unable to invoke setter %s, it isn't a member function" %
                    setterName)
                continue
            setter(paramValue)
        if printOutput:
            gLogger.info("No printing available yet")
        return S_OK(transParams)

    def getTransformationLogging(self, printOutput=False):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        res = self.transClient.getTransformationLogging(transID)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        loggingList = res['Value']
        if printOutput:
            self._printFormattedDictList(
                loggingList, ['Message', 'MessageDate', 'AuthorDN'],
                'MessageDate', 'MessageDate')
        return S_OK(loggingList)

    def extendTransformation(self, nTasks, printOutput=False):
        return self.__executeOperation('extendTransformation',
                                       nTasks,
                                       printOutput=printOutput)

    def cleanTransformation(self, printOutput=False):
        res = self.__executeOperation('cleanTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.paramValues['Status'] = 'Cleaned'
        return res

    def deleteTransformation(self, printOutput=False):
        res = self.__executeOperation('deleteTransformation',
                                      printOutput=printOutput)
        if res['OK']:
            self.reset()
        return res

    def addFilesToTransformation(self, lfns, printOutput=False):
        return self.__executeOperation('addFilesToTransformation',
                                       lfns,
                                       printOutput=printOutput)

    def setFileStatusForTransformation(self, status, lfns, printOutput=False):
        return self.__executeOperation('setFileStatusForTransformation',
                                       status,
                                       lfns,
                                       printOutput=printOutput)

    def getTransformationTaskStats(self, printOutput=False):
        return self.__executeOperation('getTransformationTaskStats',
                                       printOutput=printOutput)

    def getTransformationStats(self, printOutput=False):
        return self.__executeOperation('getTransformationStats',
                                       printOutput=printOutput)

    def deleteTasks(self, taskMin, taskMax, printOutput=False):
        return self.__executeOperation('deleteTasks',
                                       taskMin,
                                       taskMax,
                                       printOutput=printOutput)

    def addTaskForTransformation(self,
                                 lfns=[],
                                 se='Unknown',
                                 printOutput=False):
        return self.__executeOperation('addTaskForTransformation',
                                       lfns,
                                       se,
                                       printOutput=printOutput)

    def setTaskStatus(self, taskID, status, printOutput=False):
        return self.__executeOperation('setTaskStatus',
                                       taskID,
                                       status,
                                       printOutput=printOutput)

    def __executeOperation(self, operation, *parms, **kwds):
        transID = self.paramValues['TransformationID']
        if not transID:
            gLogger.fatal("No TransformationID known")
            return S_ERROR()
        printOutput = kwds.pop('printOutput')
        fcn = None
        if hasattr(self.transClient, operation) and callable(
                getattr(self.transClient, operation)):
            fcn = getattr(self.transClient, operation)
        if not fcn:
            return S_ERROR(
                "Unable to invoke %s, it isn't a member funtion of TransformationClient"
            )
        res = fcn(transID, *parms, **kwds)
        if printOutput:
            self._prettyPrint(res)
        return res

    def getTransformationFiles(self,
                               fileStatus=[],
                               lfns=[],
                               outputFields=[
                                   'FileID', 'LFN', 'Status', 'TaskID',
                                   'TargetSE', 'UsedSE', 'ErrorCount',
                                   'InsertedTime', 'LastUpdate'
                               ],
                               orderBy='FileID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if fileStatus:
            condDict['Status'] = fileStatus
        if lfns:
            condDict['LFN'] = lfns
        res = self.transClient.getTransformationFiles(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'FileID', orderBy)
        return res

    def getTransformationTasks(self,
                               taskStatus=[],
                               taskIDs=[],
                               outputFields=[
                                   'TransformationID', 'TaskID',
                                   'ExternalStatus', 'ExternalID', 'TargetSE',
                                   'CreationTime', 'LastUpdateTime'
                               ],
                               orderBy='TaskID',
                               printOutput=False):
        condDict = {'TransformationID': self.paramValues['TransformationID']}
        if taskStatus:
            condDict['ExternalStatus'] = taskStatus
        if taskIDs:
            condDict['TaskID'] = taskIDs
        res = self.transClient.getTransformationTasks(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TaskID', orderBy)
        return res

    #############################################################################
    def getTransformations(self,
                           transID=[],
                           transStatus=[],
                           outputFields=[
                               'TransformationID', 'Status', 'AgentType',
                               'TransformationName', 'CreationDate'
                           ],
                           orderBy='TransformationID',
                           printOutput=False):
        condDict = {}
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getAuthorDNfromProxy(self):
        """ gets the AuthorDN and username of the transformation from the uploaded proxy
    """
        username = ""
        author = ""
        res = getProxyInfo()
        if res['OK']:
            author = res['Value']['identity']
            username = res['Value']['username']
        else:
            gLogger.error("Unable to get uploaded proxy Info %s " %
                          res['Message'])
            return S_ERROR(res['Message'])

        res = {'username': username, 'authorDN': author}
        return S_OK(res)

    #############################################################################
    def getTransformationsByUser(self,
                                 authorDN="",
                                 userName="",
                                 transID=[],
                                 transStatus=[],
                                 outputFields=[
                                     'TransformationID', 'Status', 'AgentType',
                                     'TransformationName', 'CreationDate',
                                     'AuthorDN'
                                 ],
                                 orderBy='TransformationID',
                                 printOutput=False):
        condDict = {}
        if authorDN == "":
            res = self.getAuthorDNfromProxy()
            if not res['OK']:
                gLogger.error(res['Message'])
                return S_ERROR(res['Message'])
            else:
                foundUserName = res['Value']['username']
                foundAuthor = res['Value']['authorDN']
                # If the username whom created the uploaded proxy is different than the provided username report error and exit
                if not (userName == "" or userName == foundUserName):
                    gLogger.error(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))
                    return S_ERROR(
                        "Couldn't resolve the authorDN for user '%s' from the uploaded proxy (proxy created by '%s')"
                        % (userName, foundUserName))

                userName = foundUserName
                authorDN = foundAuthor
                gLogger.info(
                    "Will list transformations created by user '%s' with status '%s'"
                    % (userName, ', '.join(transStatus)))
        else:
            gLogger.info(
                "Will list transformations created by '%s' with status '%s'" %
                (authorDN, ', '.join(transStatus)))

        condDict['AuthorDN'] = authorDN
        if transID:
            condDict['TransformationID'] = transID
        if transStatus:
            condDict['Status'] = transStatus
        res = self.transClient.getTransformations(condDict=condDict)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res

        if printOutput:
            if not outputFields:
                gLogger.info("Available fields are: %s" %
                             res['ParameterNames'].join(' '))
            elif not res['Value']:
                gLogger.info("No tasks found for selection")
            else:
                self._printFormattedDictList(res['Value'], outputFields,
                                             'TransformationID', orderBy)
        return res

    #############################################################################
    def getSummaryTransformations(self, transID=[]):
        """Show the summary for a list of Transformations

       Fields starting with 'F' ('J')  refers to files (jobs).
       Proc. stand for processed.
    """
        condDict = {'TransformationID': transID}
        orderby = []
        start = 0
        maxitems = len(transID)
        paramShowNames = ['TransformationID', 'Type', 'Status', 'Files_Total', 'Files_PercentProcessed', \
                          'Files_Processed', 'Files_Unused', 'Jobs_TotalCreated', 'Jobs_Waiting', \
                          'Jobs_Running', 'Jobs_Done', 'Jobs_Failed', 'Jobs_Stalled']
        # Below, the header used for each field in the printing: short to fit in one line
        paramShowNamesShort = ['TransID', 'Type', 'Status', 'F_Total', 'F_Proc.(%)', 'F_Proc.', \
                               'F_Unused', 'J_Created', 'J_Wait', 'J_Run', 'J_Done', 'J_Fail', 'J_Stalled']
        dictList = []

        result = self.transClient.getTransformationSummaryWeb(
            condDict, orderby, start, maxitems)
        if not result['OK']:
            self._prettyPrint(result)
            return result

        if result['Value']['TotalRecords'] > 0:
            try:
                paramNames = result['Value']['ParameterNames']
                for paramValues in result['Value']['Records']:
                    paramShowValues = map(
                        lambda pname: paramValues[paramNames.index(pname)],
                        paramShowNames)
                    showDict = dict(zip(paramShowNamesShort, paramShowValues))
                    dictList.append(showDict)

            except Exception as x:
                print 'Exception %s ' % str(x)

        if not len(dictList) > 0:
            gLogger.error(
                'No found transformations satisfying input condition')
            return S_ERROR(
                'No found transformations satisfying input condition')
        else:
            print self._printFormattedDictList(dictList, paramShowNamesShort,
                                               paramShowNamesShort[0],
                                               paramShowNamesShort[0])

        return S_OK(dictList)

    #############################################################################
    def addTransformation(self, addFiles=True, printOutput=False):
        res = self._checkCreation()
        if not res['OK']:
            return self._errorReport(res, 'Failed transformation sanity check')
        if printOutput:
            gLogger.info(
                "Will attempt to create transformation with the following parameters"
            )
            self._prettyPrint(self.paramValues)

        res = self.transClient.addTransformation(
            self.paramValues['TransformationName'],
            self.paramValues['Description'],
            self.paramValues['LongDescription'],
            self.paramValues['Type'],
            self.paramValues['Plugin'],
            self.paramValues['AgentType'],
            self.paramValues['FileMask'],
            transformationGroup=self.paramValues['TransformationGroup'],
            groupSize=self.paramValues['GroupSize'],
            inheritedFrom=self.paramValues['InheritedFrom'],
            body=self.paramValues['Body'],
            maxTasks=self.paramValues['MaxNumberOfTasks'],
            eventsPerTask=self.paramValues['EventsPerTask'],
            addFiles=addFiles)
        if not res['OK']:
            if printOutput:
                self._prettyPrint(res)
            return res
        transID = res['Value']
        self.exists = True
        self.setTransformationID(transID)
        gLogger.notice("Created transformation %d" % transID)
        for paramName, paramValue in self.paramValues.items():
            if paramName not in self.paramTypes:
                res = self.transClient.setTransformationParameter(
                    transID, paramName, paramValue)
                if not res['OK']:
                    gLogger.error("Failed to add parameter",
                                  "%s %s" % (paramName, res['Message']))
                    gLogger.notice(
                        "To add this parameter later please execute the following."
                    )
                    gLogger.notice("oTransformation = Transformation(%d)" %
                                   transID)
                    gLogger.notice("oTransformation.set%s(...)" % paramName)
        return S_OK(transID)

    def _checkCreation(self):
        """ Few checks
    """
        if self.paramValues['TransformationID']:
            gLogger.info(
                "You are currently working with an active transformation definition."
            )
            gLogger.info(
                "If you wish to create a new transformation reset the TransformationID."
            )
            gLogger.info("oTransformation.reset()")
            return S_ERROR()

        requiredParameters = [
            'TransformationName', 'Description', 'LongDescription', 'Type'
        ]
        for parameter in requiredParameters:
            if not self.paramValues[parameter]:
                gLogger.info(
                    "%s is not defined for this transformation. This is required..."
                    % parameter)
                self.paramValues[parameter] = raw_input(
                    "Please enter the value of " + parameter + " ")

        plugin = self.paramValues['Plugin']
        if plugin:
            if not plugin in self.supportedPlugins:
                gLogger.info(
                    "The selected Plugin (%s) is not known to the transformation agent."
                    % plugin)
                res = self.__promptForParameter('Plugin',
                                                choices=self.supportedPlugins,
                                                default='Standard')
                if not res['OK']:
                    return res
                self.paramValues['Plugin'] = res['Value']

        plugin = self.paramValues['Plugin']

        return S_OK()

    def _checkBySizePlugin(self):
        return self._checkStandardPlugin()

    def _checkBySharePlugin(self):
        return self._checkStandardPlugin()

    def _checkStandardPlugin(self):
        groupSize = self.paramValues['GroupSize']
        if groupSize <= 0:
            gLogger.info(
                "The GroupSize was found to be less than zero. It has been set to 1."
            )
            res = self.setGroupSize(1)
            if not res['OK']:
                return res
        return S_OK()

    def _checkBroadcastPlugin(self):
        gLogger.info(
            "The Broadcast plugin requires the following parameters be set: %s"
            % (', '.join(['SourceSE', 'TargetSE'])))
        requiredParams = ['SourceSE', 'TargetSE']
        for requiredParam in requiredParams:
            if not self.paramValues.get(requiredParam):
                paramValue = raw_input("Please enter " + requiredParam + " ")
                setter = None
                setterName = "set%s" % requiredParam
                if hasattr(self, setterName) and callable(
                        getattr(self, setterName)):
                    setter = getattr(self, setterName)
                if not setter:
                    return S_ERROR(
                        "Unable to invoke %s, this function hasn't been implemented."
                        % setterName)
                ses = paramValue.replace(',', ' ').split()
                res = setter(ses)
                if not res['OK']:
                    return res
        return S_OK()

    def __checkSEs(self, seList):
        res = gConfig.getSections('/Resources/StorageElements')
        if not res['OK']:
            return self._errorReport(res,
                                     'Failed to get possible StorageElements')
        missing = set(seList) - set(res['Value'])
        if missing:
            for se in missing:
                gLogger.error("StorageElement %s is not known" % se)
            return S_ERROR("%d StorageElements not known" % len(missing))
        return S_OK()

    def __promptForParameter(self,
                             parameter,
                             choices=[],
                             default='',
                             insert=True):
        res = promptUser("Please enter %s" % parameter,
                         choices=choices,
                         default=default)
        if not res['OK']:
            return self._errorReport(res)
        gLogger.notice("%s will be set to '%s'" % (parameter, res['Value']))
        paramValue = res['Value']
        if insert:
            setter = None
            setterName = "set%s" % parameter
            if hasattr(self, setterName) and callable(getattr(
                    self, setterName)):
                setter = getattr(self, setterName)
            if not setter:
                return S_ERROR(
                    "Unable to invoke %s, it isn't a member function of Transformation!"
                )
            res = setter(paramValue)
            if not res['OK']:
                return res
        return S_OK(paramValue)
Exemplo n.º 31
0
class TransformationCLI( cmd.Cmd, API ):

  def __init__( self ):
    self.server = TransformationClient()
    self.indentSpace = 4
    cmd.Cmd.__init__( self )
    API.__init__( self )

  def printPair( self, key, value, separator = ":" ):
    valueList = value.split( "\n" )
    print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() )
    for valueLine in valueList[ 1:-1 ]:
      print "%s  %s" % ( " " * self.indentSpace, valueLine.strip() )

  def do_exit( self, args ):
    """ Exits the shell.
        usage: exit
    """
    sys.exit( 0 )

  def do_quit( self, *args ):
    """ Exits the shell.
        Usage: quit
    """
    sys.exit( 0 )

  def do_help( self, args ):
    """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commands"""
    cmd.Cmd.do_help( self, args )

  # overriting default help command
  def do_helpall( self, args ):
    """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
    if len( args ) == 0:
      print "\nAvailable commands:\n"
      attrList = dir( self )
      attrList.sort()
      for attribute in attrList:
        if attribute.find( "do_" ) == 0:
          self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] )
          print ""
    else:
      command = args.split()[0].strip()
      try:
        obj = getattr( self, "do_%s" % command )
      except:
        print "There's no such %s command" % command
        return
      self.printPair( command, obj.__doc__[1:] )

  def do_shell( self, args ):
    """Execute a shell command

       usage !<shell_command>
    """
    comm = args
    res = shellCall( 0, comm )
    if res['OK'] and res['Value'][0] == 0:
      _returnCode, stdOut, stdErr = res['Value']
      print "%s\n%s" % ( stdOut, stdErr )
    else:
      print res['Message']

  def check_params( self, args, num ):
    """Checks if the number of parameters correct"""
    argss = args.split()
    length = len( argss )
    if length < num:
      print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num )
      return ( False, length )
    return ( argss, length )

  def check_id_or_name( self, id_or_name ):
    """resolve name or Id by converting type of argument """
    if id_or_name.isdigit():
      return long( id_or_name ) # its look like id
    return id_or_name

  ####################################################################
  #
  # These are the methods for transformation manipulation
  #

  def do_getall( self, args ):
    """Get transformation details

       usage: getall [Status] [Status]
    """
    oTrans = Transformation()
    oTrans.getTransformations( transStatus = args.split(), printOutput = True )

  def do_getAllByUser( self, args ):
    """Get all transformations created by a given user

The first argument is the authorDN or username. The authorDN
is preferred: it need to be inside quotes because contains
white spaces. Only authorDN should be quoted.

When the username is provided instead, 
the authorDN is retrieved from the uploaded proxy,
so that the retrieved transformations are those created by
the user who uploaded that proxy: that user could be different
that the username provided to the function.

       usage: getAllByUser authorDN or username [Status] [Status]
    """
    oTrans = Transformation()
    argss = args.split()
    username = ""
    author = ""
    status = []
    if not len( argss ) > 0:
      print self.do_getAllByUser.__doc__
      return

    # if the user didnt quoted the authorDN ends
    if '=' in argss[0] and argss[0][0] not in ["'", '"']:
      print "AuthorDN need to be quoted (just quote that argument)"
      return

    if argss[0][0] in ["'", '"']: # authorDN given
      author = argss[0]
      status_idx = 1
      for arg in argss[1:]:
        author += ' ' + arg
        status_idx +=1
        if arg[-1] in ["'", '"']:
          break
      # At this point we should have something like 'author'
      if not author[0] in ["'", '"'] or not author[-1] in ["'", '"']:
        print "AuthorDN need to be quoted (just quote that argument)"
        return
      else:
        author = author[1:-1] # throw away the quotes
      # the rest are the requested status
      status = argss[ status_idx: ]
    else: # username given
      username = argss[0]
      status = argss[ 1: ]

    oTrans.getTransformationsByUser( authorDN = author, userName = username, transStatus = status, printOutput = True )

  def do_summaryTransformations( self, args ):
    """Show the summary for a list of Transformations

    Fields starting with 'F' ('J')  refers to files (jobs).
    Proc. stand for processed.

        Usage: summaryTransformations <ProdID> [<ProdID> ...]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print self.do_summaryTransformations.__doc__
      return

    transid = argss
    oTrans = Transformation()
    oTrans.getSummaryTransformations( transID = transid )

  def do_getStatus( self, args ):
    """Get transformation details

       usage: getStatus <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.getTransformation( transName )
      if not res['OK']:
        print "Getting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s: %s" % ( transName, res['Value']['Status'] )

  def do_setStatus( self, args ):
    """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
    argss = args.split()
    if not len( argss ) > 1:
      print "transformation and status not supplied"
      return
    status = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, 'Status', status )
      if not res['OK']:
        print "Setting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s set to %s" % ( transName, status )

  def do_start( self, args ):
    """Start transformation

       usage: start <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Active' )
      if not res['OK']:
        print "Setting Status of %s failed: %s" % ( transName, res['Message'] )
      else:
        res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' )
        if not res['OK']:
          print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] )
        else:
          print "%s started" % transName

  def do_stop( self, args ):
    """Stop transformation

       usage: stop <transID|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' )
      if not res['OK']:
        print "Stopping of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s stopped" % transName

  def do_flush( self, args ):
    """Flush transformation

       usage: flush <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Flush' )
      if not res['OK']:
        print "Flushing of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s flushing" % transName

  def do_get( self, args ):
    """Get transformation definition

    usage: get <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Body' )
      printDict( res['Value'] )

  def do_getBody( self, args ):
    """Get transformation body

    usage: getBody <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      print res['Value']['Body']

  def do_getFileStat( self, args ):
    """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformationStats( transName )
    if not res['OK']:
      print "Failed to get statistics for %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Total' )
      printDict( res['Value'] )

  def do_modMask( self, args ):
    """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    mask = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, "FileMask", mask )
      if not res['OK']:
        print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] )
      else:
        print "Updated %s filemask" % transName

  def do_getFiles( self, args ):
    """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    status = argss[1:]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      if status:
        selectDict['Status'] = status
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                                      'LFN', 'LFN' )
      else:
        print "No files found"

  def do_getFileStatus( self, args ):
    """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
    argss = args.split()
    if len( argss ) < 2:
      print "transformation and file not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]

    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        filesList = []
        for fileDict in res['Value']:
          if fileDict['LFN'] in lfns:
            filesList.append( fileDict )
        if  filesList:
          self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'],
                                        'LFN', 'LFN' )
        else:
          print "Could not find any LFN in", lfns, "for transformation", transName
      else:
        print "No files found"

  def do_getOutputFiles( self, args ):
    """Get output files for the transformation

    usage: getOutputFiles <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      fc = FileCatalog()
      meta = {}
      meta ['ProdID'] = transName
      res = fc.findFilesByMetadata( meta )
      if not res['OK']:
        print res['Message']
        return
      if not len( res['Value'] ) > 0:
        print 'No output files yet for transformation %d' %int(transName)
        return
      else:
        for lfn in res['Value']:
          print lfn

  def do_getInputDataQuery( self, args ):
    """Get input data query for the transformation

    usage: getInputDataQuery <transName|ID>
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformationInputDataQuery( transName )
    if not res['OK']:
      print "Failed to get transformation input data query: %s" % res['Message']
    else:
      print res['Value']

  def do_setFileStatus( self, args ):
    """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
    argss = args.split()
    if not len( argss ) == 3:
      print "transformation file and status not supplied"
      return
    transName = argss[0]
    lfn = argss[1]
    status = argss[2]
    res = self.server.setFileStatusForTransformation( transName, status, [lfn] )
    if not res['OK']:
      print "Failed to update file status: %s" % res['Message']
    else:
      print "Updated file status to %s" % status

  def do_resetFile( self, args ):
    """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfns>
    """
    argss = args.split()
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if 'Failed' in res['Value']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  def do_resetProcessedFile( self, args ):
    """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
    argss = args.split() 
    
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if res['Value']['Failed']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  ####################################################################
  #
  # These are the methods for file manipulation
  #

  def do_addDirectory( self, args ):
    """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no directory supplied"
      return
    for directory in argss:
      res = self.server.addDirectory( directory, force = True )
      if not res['OK']:
        print 'failed to add directory %s: %s' % ( directory, res['Message'] )
      else:
        print 'added %s files for %s' % ( res['Value'], directory )

  def do_replicas( self, args ):
    """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.getReplicas( argss )
    if not res['OK']:
      print "failed to get any replica information: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to get replica information for %s: %s" % ( lfn, error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      ses = sorted( res['Value']['Successful'][lfn].keys() )
      outStr = "%s :" % lfn.ljust( 100 )
      for se in ses:
        outStr = "%s %s" % ( outStr, se.ljust( 15 ) )
      print outStr

  def do_addFile( self, args ):
    """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no files supplied"
      return
    lfnDict = {}
    for lfn in argss:
      lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID',
                      'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addFile( lfnDict, force = True )
    if not res['OK']:
      print "failed to add any files: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add %s: %s" % ( lfn, error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeFile( self, args ):
    """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
    argss = args.split()
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.removeFile( argss )
    if not res['OK']:
      print "failed to remove any files: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove %s: %s" % ( lfn, error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_addReplica( self, args ):
    """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
    argss = args.split()
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addReplica( lfnDict, force = True )
    if not res['OK']:
      print "failed to add replica: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add replica: %s" % ( error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeReplica( self, args ):
    """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
    argss = args.split()
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.removeReplica( lfnDict )
    if not res['OK']:
      print "failed to remove replica: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove replica: %s" % ( error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_setReplicaStatus( self, args ):
    """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
    argss = args.split()
    if not len( argss ) > 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    status = argss[1]
    se = argss[2]
    lfnDict = {}
    lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.setReplicaStatus( lfnDict )
    if not res['OK']:
      print "failed to set replica status: %s" % res['Message']
      return
    for lfn in sorted( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to set replica status: %s" % ( error )
    for lfn in sorted( res['Value']['Successful'].keys() ):
      print "updated replica status %s" % lfn
Exemplo n.º 32
0
class TransformationCLI( cmd.Cmd, API ):

  def __init__( self ):
    self.server = TransformationClient()
    self.indentSpace = 4
    cmd.Cmd.__init__( self )
    API.__init__( self )

  def printPair( self, key, value, separator = ":" ):
    valueList = value.split( "\n" )
    print "%s%s%s %s" % ( key, " " * ( self.indentSpace - len( key ) ), separator, valueList[0].strip() )
    for valueLine in valueList[ 1:-1 ]:
      print "%s  %s" % ( " " * self.indentSpace, valueLine.strip() )

  def do_exit( self, args ):
    """ Exits the shell.
        usage: exit
    """
    sys.exit( 0 )

  def do_quit( self, *args ):
    """ Exits the shell.
        Usage: quit
    """
    sys.exit( 0 )

  def do_help( self, args ):
    """ Default version of the help command
       Usage: help <command>
       OR use helpall to see description for all commans"""
    cmd.Cmd.do_help( self, args )

  # overriting default help command
  def do_helpall( self, args ):
    """
    Shows help information
        Usage: helpall <command>
        If no command is specified all commands are shown
    """
    if len( args ) == 0:
      print "\nAvailable commands:\n"
      attrList = dir( self )
      attrList.sort()
      for attribute in attrList:
        if attribute.find( "do_" ) == 0:
          self.printPair( attribute[ 3: ], getattr( self, attribute ).__doc__[ 1: ] )
          print ""
    else:
      command = args.split()[0].strip()
      try:
        obj = getattr( self, "do_%s" % command )
      except:
        print "There's no such %s command" % command
        return
      self.printPair( command, obj.__doc__[1:] )

  def do_shell( self, args ):
    """Execute a shell command

       usage !<shell_command>
    """
    comm = args
    res = shellCall( 0, comm )
    if res['OK'] and res['Value'][0] == 0:
      returnCode, stdOut, stdErr = res['Value']
      print "%s\n%s" % ( stdOut, stdErr )
    else:
      print res['Message']

  def check_params( self, args, num ):
    """Checks if the number of parameters correct"""
    argss = string.split( args )
    length = len( argss )
    if length < num:
      print "Error: Number of arguments provided %d less that required %d, please correct." % ( length, num )
      return ( False, length )
    return ( argss, length )

  def check_id_or_name( self, id_or_name ):
    """resolve name or Id by converting type of argument """
    if id_or_name.isdigit():
      return long( id_or_name ) # its look like id
    return id_or_name

  def do_setServer( self, args ):
    """ Set the destination server

        usage: setServer serverURL
    """
    argss = string.split( args )
    if len( argss ) == 0:
      print "no server provided"
    self.serverURL = argss[0]
    self.server.setServer( self.serverURL )

  ####################################################################
  #
  # These are the methods for transformation manipulation
  #

  def do_getall( self, args ):
    """Get transformation details

       usage: getall [Status] [Status]
    """
    oTrans = Transformation()
    oTrans.setServer( self.serverURL )
    oTrans.getTransformations( transStatus = string.split( args ), printOutput = True )

  def do_getStatus( self, args ):
    """Get transformation details

       usage: getStatus <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.getTransformation( transName )
      if not res['OK']:
        print "Getting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s: %s" % ( transName, res['Value']['Status'] )

  def do_setStatus( self, args ):
    """Set transformation status

       usage: setStatus  <Status> <transName|ID>
       Status <'New' 'Active' 'Stopped' 'Completed' 'Cleaning'>
    """
    argss = string.split( args )
    if not len( argss ) > 1:
      print "transformation and status not supplied"
      return
    status = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, 'Status', status )
      if not res['OK']:
        print "Setting status of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s set to %s" % ( transName, status )

  def do_start( self, args ):
    """Start transformation

       usage: start <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Active' )
      if not res['OK']:
        print "Setting Status of %s failed: %s" % ( transName, res['Message'] )
      else:
        res = self.server.setTransformationParameter( transName, 'AgentType', 'Automatic' )
        if not res['OK']:
          print "Setting AgentType of %s failed: %s" % ( transName, res['Message'] )
        else:
          print "%s started" % transName

  def do_stop( self, args ):
    """Stop transformation

       usage: stop <transID|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'AgentType', 'Manual' )
      if not res['OK']:
        print "Stopping of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s stopped" % transName

  def do_flush( self, args ):
    """Flush transformation

       usage: flush <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    for transName in argss:
      res = self.server.setTransformationParameter( transName, 'Status', 'Flush' )
      if not res['OK']:
        print "Flushing of %s failed: %s" % ( transName, res['Message'] )
      else:
        print "%s flushing" % transName

  def do_get( self, args ):
    """Get transformation definition

    usage: get <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Body' )
      printDict( res['Value'] )

  def do_getBody( self, args ):
    """Get transformation body

    usage: getBody <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get %s: %s" % ( transName, res['Message'] )
    else:
      print res['Value']['Body']

  def do_getFileStat( self, args ):
    """Get transformation file statistics

     usage: getFileStat <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    res = self.server.getTransformationStats( transName )
    if not res['OK']:
      print "Failed to get statistics for %s: %s" % ( transName, res['Message'] )
    else:
      res['Value'].pop( 'Total' )
      printDict( res['Value'] )

  def do_modMask( self, args ):
    """Modify transformation input definition

       usage: modInput <mask> <transName|ID>
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    mask = argss[0]
    transNames = argss[1:]
    for transName in transNames:
      res = self.server.setTransformationParameter( transName, "FileMask", mask )
      if not res['OK']:
        print "Failed to modify input file mask for %s: %s" % ( transName, res['Message'] )
      else:
        print "Updated %s filemask" % transName

  def do_getFiles( self, args ):
    """Get files for the transformation (optionally with a given status)

    usage: getFiles <transName|ID> [Status] [Status]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no transformation supplied"
      return
    transName = argss[0]
    status = argss[1:]
    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      if status:
        selectDict['Status'] = status
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        self._printFormattedDictList( res['Value'], ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' )
      else:
        print "No files found"

  def do_getFileStatus( self, args ):
    """Get file(s) status for the given transformation

    usage: getFileStatus <transName|ID> <lfn> [<lfn>...]
    """
    argss = string.split( args )
    if len( argss ) < 2:
      print "transformation and file not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]

    res = self.server.getTransformation( transName )
    if not res['OK']:
      print "Failed to get transformation information: %s" % res['Message']
    else:
      selectDict = {'TransformationID':res['Value']['TransformationID']}
      res = self.server.getTransformationFiles( condDict = selectDict )
      if not res['OK']:
        print "Failed to get transformation files: %s" % res['Message']
      elif res['Value']:
        filesList = []
        for fileDict in res['Value']:
          if fileDict['LFN'] in lfns:
            filesList.append( fileDict )
        if  filesList:
          self._printFormattedDictList( filesList, ['LFN', 'Status', 'ErrorCount', 'TargetSE', 'LastUpdate'], 'LFN', 'LFN' )
        else:
          print "Could not find any LFN in", lfns, "for transformation", transName
      else:
        print "No files found"

  def do_setFileStatus( self, args ):
    """Set file status for the given transformation

    usage: setFileStatus <transName|ID> <lfn> <status>
    """
    argss = string.split( args )
    if not len( argss ) == 3:
      print "transformation file and status not supplied"
      return
    transName = argss[0]
    lfn = argss[1]
    status = argss[2]
    res = self.server.setFileStatusForTransformation( transName, status, [lfn] )
    if not res['OK']:
      print "Failed to update file status: %s" % res['Message']
    else:
      print "Updated file status to %s" % status

  def do_resetFile( self, args ):
    """Reset file status for the given transformation

    usage: resetFile <transName|ID> <lfn>
    """
    argss = string.split( args )
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if res['Value']['Failed']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  def do_resetProcessedFile( self, args ):
    """ Reset file status for the given transformation
        usage: resetFile <transName|ID> <lfn>
    """
    argss = string.split( args )
    if not len( argss ) > 1:
      print "transformation and file(s) not supplied"
      return
    transName = argss[0]
    lfns = argss[1:]
    res = self.server.setFileStatusForTransformation( transName, 'Unused', lfns, force = True )
    if not res['OK']:
      print "Failed to reset file status: %s" % res['Message']
    else:
      if res['Value']['Failed']:
        print "Could not reset some files: "
        for lfn, reason in res['Value']['Failed'].items():
          print lfn, reason
      else:
        print "Updated file statuses to 'Unused' for %d file(s)" % len( lfns )

  ####################################################################
  #
  # These are the methods for file manipulation
  #

  def do_addDirectory( self, args ):
    """Add files from the given catalog directory

    usage: addDirectory <directory> [directory]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no directory supplied"
      return
    for directory in argss:
      res = self.server.addDirectory( directory, force = True )
      if not res['OK']:
        print 'failed to add directory %s: %s' % ( directory, res['Message'] )
      else:
        print 'added %s files for %s' % ( res['Value'], directory )

  def do_replicas( self, args ):
    """ Get replicas for <path>

        usage: replicas <lfn> [lfn]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.getReplicas( argss )
    if not res['OK']:
      print "failed to get any replica information: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to get replica information for %s: %s" % ( lfn, error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      ses = sortList( res['Value']['Successful'][lfn].keys() )
      outStr = "%s :" % lfn.ljust( 100 )
      for se in ses:
        outStr = "%s %s" % ( outStr, se.ljust( 15 ) )
      print outStr

  def do_addFile( self, args ):
    """Add new files to transformation DB

    usage: addFile <lfn> [lfn]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no files supplied"
      return
    lfnDict = {}
    for lfn in argss:
      lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':'IGNORED-SE', 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addFile( lfnDict, force = True )
    if not res['OK']:
      print "failed to add any files: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add %s: %s" % ( lfn, error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeFile( self, args ):
    """Remove file from transformation DB

    usage: removeFile <lfn> [lfn]
    """
    argss = string.split( args )
    if not len( argss ) > 0:
      print "no files supplied"
      return
    res = self.server.removeFile( argss )
    if not res['OK']:
      print "failed to remove any files: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove %s: %s" % ( lfn, error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_addReplica( self, args ):
    """ Add new replica to the transformation DB

    usage: addReplica <lfn> <se>
    """
    argss = string.split( args )
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.addReplica( lfnDict, force = True )
    if not res['OK']:
      print "failed to add replica: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to add replica: %s" % ( error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "added %s" % lfn

  def do_removeReplica( self, args ):
    """Remove replica from the transformation DB

    usage: removeReplica <lfn> <se>
    """
    argss = string.split( args )
    if not len( argss ) == 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    se = argss[1]
    lfnDict = {}
    lfnDict[lfn] = {'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.removeReplica( lfnDict )
    if not res['OK']:
      print "failed to remove replica: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to remove replica: %s" % ( error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "removed %s" % lfn

  def do_setReplicaStatus( self, args ):
    """Set replica status, usually used to mark a replica Problematic

    usage: setReplicaStatus <lfn> <status> <se>
    """
    argss = string.split( args )
    if not len( argss ) > 2:
      print "no file info supplied"
      return
    lfn = argss[0]
    status = argss[1]
    se = argss[2]
    lfnDict = {}
    lfnDict[lfn] = {'Status':status, 'PFN':'IGNORED-PFN', 'SE':se, 'Size':0, 'GUID':'IGNORED-GUID', 'Checksum':'IGNORED-CHECKSUM'}
    res = self.server.setReplicaStatus( lfnDict )
    if not res['OK']:
      print "failed to set replica status: %s" % res['Message']
      return
    for lfn in sortList( res['Value']['Failed'].keys() ):
      error = res['Value']['Failed'][lfn]
      print "failed to set replica status: %s" % ( error )
    for lfn in sortList( res['Value']['Successful'].keys() ):
      print "updated replica status %s" % lfn
Exemplo n.º 33
0
class TransformationAgent( AgentModule ):

  def initialize( self ):
    self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.checkCatalog = self.am_getOption( 'CheckCatalog', 'yes' )

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/ProductionManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    self.transDB = TransformationClient( 'TransformationDB' )
    self.rm = ReplicaManager()
    return S_OK()

  def execute( self ):
    # Get the transformations to process
    res = self.getTransformations()
    if not res['OK']:
      gLogger.info( "%s.execute: Failed to obtain transformations: %s" % ( AGENT_NAME, res['Message'] ) )
      return S_OK()
    # Process the transformations
    for transDict in res['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.info( "%s.execute: Processing transformation %s." % ( AGENT_NAME, transID ) )
      startTime = time.time()
      res = self.processTransformation( transDict )
      if not res['OK']:
        gLogger.info( "%s.execute: Failed to process transformation: %s" % ( AGENT_NAME, res['Message'] ) )
      else:
        gLogger.info( "%s.execute: Processed transformation in %.1f seconds" % ( AGENT_NAME, time.time() - startTime ) )
    return S_OK()

  def getTransformations( self ):
    # Obtain the transformations to be executed
    transName = self.am_getOption( 'Transformation', 'All' )
    if transName == 'All':
      gLogger.info( "%s.getTransformations: Initializing general purpose agent." % AGENT_NAME )
      res = self.transDB.getTransformations( {'Status':['Active', 'Completing', 'Flush']}, extraParams = True )
      if not res['OK']:
        gLogger.error( "%s.getTransformations: Failed to get transformations." % AGENT_NAME, res['Message'] )
        return res
      transformations = res['Value']
      gLogger.info( "%s.getTransformations: Obtained %d transformations to process" % ( AGENT_NAME, len( transformations ) ) )
    else:
      gLogger.info( "%s.getTransformations: Initializing for transformation %s." % ( AGENT_NAME, transName ) )
      res = self.transDB.getTransformation( transName, extraParams = True )
      if not res['OK']:
        gLogger.error( "%s.getTransformations: Failed to get transformation." % AGENT_NAME, res['Message'] )
        return res
      transformations = [res['Value']]
    return S_OK( transformations )

  def processTransformation( self, transDict ):
    transID = transDict['TransformationID']
    # First get the LFNs associated to the transformation
    res = self.transDB.getTransformationFiles( condDict = {'TransformationID':transID, 'Status':'Unused'} )
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to obtain input data." % AGENT_NAME, res['Message'] )
      return res
    transFiles = res['Value']
    lfns = res['LFNs']
    if not lfns:
      gLogger.info( "%s.processTransformation: No 'Unused' files found for transformation." % AGENT_NAME )
      if transDict['Status'] == 'Flush':
        res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
        if not res['OK']:
          gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] )
        else:
          gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME )
      return S_OK()

    # Check the data is available with replicas
    res = self.__getDataReplicas( transID, lfns, active = ( transDict['Type'].lower() not in ["replication", "removal"] ) )
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to get data replicas" % AGENT_NAME, res['Message'] )
      return res
    dataReplicas = res['Value']

    # Get the plug-in type and create the plug-in object
    plugin = 'Standard'
    if transDict.has_key( 'Plugin' ) and transDict['Plugin']:
      plugin = transDict['Plugin']
    gLogger.info( "%s.processTransformation: Processing transformation with '%s' plug-in." % ( AGENT_NAME, plugin ) )
    res = self.__generatePluginObject( plugin )
    if not res['OK']:
      return res
    oPlugin = res['Value']

    # Get the plug-in and set the required params
    oPlugin.setParameters( transDict )
    oPlugin.setInputData( dataReplicas )
    oPlugin.setTransformationFiles( transFiles )
    res = oPlugin.generateTasks()
    if not res['OK']:
      gLogger.error( "%s.processTransformation: Failed to generate tasks for transformation." % AGENT_NAME, res['Message'] )
      return res
    tasks = res['Value']
    # Create the tasks
    allCreated = True
    created = 0
    for se, lfns in tasks:
      res = self.transDB.addTaskForTransformation( transID, lfns, se )
      if not res['OK']:
        gLogger.error( "%s.processTransformation: Failed to add task generated by plug-in." % AGENT_NAME, res['Message'] )
        allCreated = False
      else:
        created += 1
    if created:
      gLogger.info( "%s.processTransformation: Successfully created %d tasks for transformation." % ( AGENT_NAME, created ) )

    # If this production is to Flush
    if transDict['Status'] == 'Flush' and allCreated:
      res = self.transDB.setTransformationParameter( transID, 'Status', 'Active' )
      if not res['OK']:
        gLogger.error( "%s.execute: Failed to update transformation status to 'Active'." % AGENT_NAME, res['Message'] )
      else:
        gLogger.info( "%s.execute: Updated transformation status to 'Active'." % AGENT_NAME )
    return S_OK()

  ######################################################################
  #
  # Internal methods used by the agent
  #

  def __generatePluginObject( self, plugin ):
    """ This simply instantiates the TransformationPlugin class with the relevant plugin name
    """
    try:
      plugModule = __import__( self.pluginLocation, globals(), locals(), ['TransformationPlugin'] )
    except Exception, x:
      gLogger.exception( "%s.__generatePluginObject: Failed to import 'TransformationPlugin'" % AGENT_NAME, '', x )
      return S_ERROR()
    try:
      evalString = "plugModule.TransformationPlugin('%s')" % plugin
      return S_OK( eval( evalString ) )
    except Exception, x:
      gLogger.exception( "%s.__generatePluginObject: Failed to create %s()." % ( AGENT_NAME, plugin ), '', x )
      return S_ERROR()