Exemplo n.º 1
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        Client.__init__(self, **kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    ##########################################################################
    #
    # This section contains the specific methods for LFC->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfns) in types.StringTypes:
            lfns = [lfns]
        res = self.__getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        res = self.__getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas = res['Value']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses)

    def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
        seLfns = {}
        for lfn, replicaDict in replicas.items():
            for se, _url in replicaDict.items():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sortList(seLfns):
            files = len(seLfns[se])
            gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].items():
                if lfn in catalogMetadata:
                    if (metadata['Size'] != catalogMetadata[lfn]['Size']) and (
                            metadata['Size'] != 0):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
            if sizeMismatch:
                self.__reportProblematicReplicas(sizeMismatch, se,
                                                 'CatalogPFNSizeMismatch')
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        lfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].items():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, lfnMetadata in lfnMetadataDict.items():
            if lfnMetadata['Lost']:
                lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
            if lfnMetadata['Unavailable']:
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if lfnMetadata['Size'] == 0:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.__reportProblematicReplicas(unavailableReplicas, se,
                                             'PFNUnavailable')
        if zeroSizeReplicas:
            self.__reportProblematicReplicas(zeroSizeReplicas, se,
                                             'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(lfnMetadataDict)

    ##########################################################################
    #
    # This section contains the specific methods for SE->LFC checks
    #

    def storageDirectoryToCatalog(self, lfnDir, storageElement):
        """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the SE->LFC check at %s" % storageElement)
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getStorageDirectoryContents(lfnDir, storageElement)
        if not res['OK']:
            return res
        storageFileMetadata = res['Value']
        if storageFileMetadata:
            return self.__checkCatalogForSEFiles(storageFileMetadata,
                                                 storageElement)
        return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

    def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
        gLogger.info('Checking %s storage files exist in the catalog' %
                     len(storageMetadata))

        res = self.fc.getReplicas(storageMetadata)
        if not res['OK']:
            gLogger.error("Failed to get replicas for LFN", res['Message'])
            return res
        failedLfns = res['Value']['Failed']
        successfulLfns = res['Value']['Successful']
        notRegisteredLfns = []

        for lfn in storageMetadata:
            if lfn in failedLfns:
                if 'No such file or directory' in failedLfns[lfn]:
                    notRegisteredLfns.append(
                        (lfn, 'deprecatedUrl', storageElement,
                         'LFNNotRegistered'))
                    failedLfns.pop(lfn)
            elif storageElement not in successfulLfns[lfn]:
                notRegisteredLfns.append(
                    (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

        if notRegisteredLfns:
            self.__reportProblematicReplicas(notRegisteredLfns, storageElement,
                                             'LFNNotRegistered')
        if failedLfns:
            return S_ERROR('Failed to obtain replicas')

        # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
        res = self.__getCatalogMetadata(storageMetadata)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        sizeMismatch = []
        for lfn, lfnCatalogMetadata in catalogMetadata.items():
            lfnStorageMetadata = storageMetadata[lfn]
            if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (
                    lfnStorageMetadata['Size'] != 0):
                sizeMismatch.append((lfn, 'deprecatedUrl', storageElement,
                                     'CatalogPFNSizeMismatch'))
        if sizeMismatch:
            self.__reportProblematicReplicas(sizeMismatch, storageElement,
                                             'CatalogPFNSizeMismatch')
        gLogger.info('Checking storage files exist in the catalog complete')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'StorageMetadata': storageMetadata
        }
        return S_OK(resDict)

    def getStorageDirectoryContents(self, lfnDir, storageElement):
        """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
        return self.__getStorageDirectoryContents(lfnDir, storageElement)

    def __getStorageDirectoryContents(self, lfnDir, storageElement):
        """ Obtians the contents of the supplied directory on the storage
    """
        gLogger.info('Obtaining the contents for %s directories at %s' %
                     (len(lfnDir), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnDir)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directories",
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of directory')
        directoryExists = res['Value']['Successful']
        activeDirs = []
        for directory in sorted(directoryExists):
            exists = directoryExists[directory]
            if exists:
                activeDirs.append(directory)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = se.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif currentDir in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
                return S_ERROR(res['Value']['Failed'][currentDir])
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(
                    se.getLFNFromURL(dirContents['SubDirs']).get(
                        'Value', {}).get('Successful', []))
                fileURLMetadata = dirContents['Files']
                fileMetadata = {}
                res = se.getLFNFromURL(fileURLMetadata)
                if not res['OK']:
                    gLogger.error('Failed to get directory content LFNs',
                                  res['Message'])
                    return res

                for url, error in res['Value']['Failed'].items():
                    gLogger.error("Failed to get LFN for URL",
                                  "%s %s" % (url, error))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to get LFNs for PFNs")
                urlLfns = res['Value']['Successful']
                for urlLfn, lfn in urlLfns.items():
                    fileMetadata[lfn] = fileURLMetadata[urlLfn]
                allFiles.update(fileMetadata)

        zeroSizeFiles = []

        for lfn in sorted(allFiles):
            if os.path.basename(lfn) == 'dirac_directory':
                allFiles.pop(lfn)
            else:
                metadata = allFiles[lfn]
                if metadata['Size'] == 0:
                    zeroSizeFiles.append(
                        (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
        if zeroSizeFiles:
            self.__reportProblematicReplicas(zeroSizeFiles, storageElement,
                                             'PFNZeroSize')

        gLogger.info('Obtained at total of %s files for directories at %s' %
                     (len(allFiles), storageElement))
        return S_OK(allFiles)

    def __getStoragePathExists(self, lfnPaths, storageElement):
        gLogger.info('Determining the existance of %d files at %s' %
                     (len(lfnPaths), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnPaths)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of paths",
                          res['Message'])
            return res
        for lfnPath, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of path',
                          '%s %s' % (lfnPath, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of paths')
        pathExists = res['Value']['Successful']
        resDict = {}
        for lfn, exists in pathExists.items():
            if exists:
                resDict[lfn] = True
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the specific methods for obtaining replica and metadata information from the catalog
    #

    def __getCatalogDirectoryContents(self, lfnDir):
        """ Obtain the contents of the supplied directory
    """
        gLogger.info('Obtaining the catalog contents for %s directories' %
                     len(lfnDir))

        activeDirs = lfnDir
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.fc.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif res['Value']['Failed'].has_key(currentDir):
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])

        zeroReplicaFiles = []
        zeroSizeFiles = []
        allReplicaDict = {}
        allMetadataDict = {}
        for lfn, lfnDict in allFiles.items():
            lfnReplicas = {}
            for se, replicaDict in lfnDict['Replicas'].items():
                lfnReplicas[se] = replicaDict['PFN']
            if not lfnReplicas:
                zeroReplicaFiles.append(lfn)
            allReplicaDict[lfn] = lfnReplicas
            allMetadataDict[lfn] = lfnDict['MetaData']
            if lfnDict['MetaData']['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info(
            'Obtained at total of %s files for the supplied directories' %
            len(allMetadataDict))
        resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict}
        return S_OK(resDict)

    def __getCatalogReplicas(self, lfns):
        """ Obtain the file replicas from the catalog while checking that there are replicas
    """
        gLogger.info('Obtaining the replicas for %s files' % len(lfns))

        zeroReplicaFiles = []
        res = self.fc.getReplicas(lfns, allStatus=True)
        if not res['OK']:
            gLogger.error('Failed to get catalog replicas', res['Message'])
            return res
        allReplicas = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('File has zero replicas', error):
                zeroReplicaFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        gLogger.info('Obtaining the replicas for files complete')
        return S_OK(allReplicas)

    def __getCatalogMetadata(self, lfns):
        """ Obtain the file metadata from the catalog while checking they exist
    """
        if not lfns:
            return S_OK({})
        gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

        missingCatalogFiles = []
        zeroSizeFiles = []
        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('No such file or directory', error):
                missingCatalogFiles.append(lfn)
        if missingCatalogFiles:
            self.__reportProblematicFiles(missingCatalogFiles,
                                          'LFNCatalogMissing')
        for lfn, metadata in allMetadata.items():
            if metadata['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info('Obtaining the catalog metadata complete')
        return S_OK(allMetadata)

    ##########################################################################
    #
    # This section contains the methods for inserting problematic files into the integrity DB
    #

    def __reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sortList(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(lfn) == types.ListType:
            lfns = lfn
        elif type(lfn) == types.StringType:
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def __reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sortList(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(replicaTuple) == types.TupleType:
            replicaTuple = [replicaTuple]
        elif type(replicaTuple) == types.ListType:
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res


#   def __getRegisteredPFNLFN( self, pfn, storageElement ):
#
#     res = StorageElement( storageElement ).getURL( pfn )
#     if not res['OK']:
#       gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
#       return res
#     for pfn, error in res['Value']['Failed'].items():
#       gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
#       return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
#     registeredPFN = res['Value']['Successful'][pfn]
#     res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) )
#     if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
#       return S_OK( False )
#     return S_OK( res['Value'] )

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if (catalogSize == bookkeepingSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.__reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
Exemplo n.º 2
0
class CatalogPlugInTestCase(unittest.TestCase):
    """Base class for the CatalogPlugin test case"""
    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            "PFN": pfn,
            "Size": size,
            "SE": se,
            "GUID": guid,
            "Checksum": adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Successful"])
        self.assertTrue(path in res["Value"]["Successful"])
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Failed"])
        self.assertTrue(path in res["Value"]["Failed"])
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = list(returnValue["Files"])
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Exemplo n.º 3
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """
    def setUp(self):
        self.fullMetadata = [
            'Status', 'ChecksumType', 'OwnerRole', 'CreationDate', 'Checksum',
            'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size'
        ]
        self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
        self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in xrange(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = 'protocol://host:port/storage/path%s' % lfn
        size = 10000000
        se = 'DIRAC-storage'
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            'PFN': pfn,
            'Size': size,
            'SE': se,
            'GUID': guid,
            'Checksum': adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Successful'])
        self.assertTrue(path in res['Value']['Successful'])
        return res['Value']['Successful'][path]

    def parseError(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Failed'])
        self.assertTrue(path in res['Value']['Failed'])
        return res['Value']['Failed'][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue['Files'].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {'PFN': pfn, 'SE': se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
class RequestPreparationAgent( AgentModule ):

  def initialize( self ):
    self.fileCatalog = FileCatalog()
    self.dm = DataManager()
    self.stagerClient = StorageManagerClient()
    self.dataIntegrityClient = DataIntegrityClient()
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
    res = self.__getNewReplicas()
    if not res['OK']:
      gLogger.fatal( "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.info( "There were no New replicas found" )
      return res
    replicas = res['Value']['Replicas']
    replicaIDs = res['Value']['ReplicaIDs']
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len( replicaIDs ) )

    # Check if the files exist in the FileCatalog
    res = self.__getExistingFiles( replicas )
    if not res['OK']:
      return res
    exist = res['Value']['Exist']
    terminal = res['Value']['Missing']
    failed = res['Value']['Failed']
    if not exist:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed to determine the existence of any file' )
      return S_OK()
    terminalReplicaIDs = {}
    for lfn, reason in terminal.items():
      for replicaID in replicas[lfn].values():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len( exist ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len( terminal ) )

    # Obtain the file sizes from the FileCatalog
    res = self.__getFileSize( exist )
    if not res['OK']:
      return res
    failed.update( res['Value']['Failed'] )
    terminal = res['Value']['ZeroSize']
    fileSizes = res['Value']['FileSizes']
    if not fileSizes:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files' )
      return S_OK()
    for lfn, reason in terminal.items():
      for _se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len( fileSizes ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog." % len( terminal ) )

    # Obtain the replicas from the FileCatalog
    res = self.__getFileReplicas( fileSizes.keys() )
    if not res['OK']:
      return res
    failed.update( res['Value']['Failed'] )
    terminal = res['Value']['ZeroReplicas']
    fileReplicas = res['Value']['Replicas']
    if not fileReplicas:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files' )
      return S_OK()
    for lfn, reason in terminal.items():
      for _se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog." % len( fileReplicas ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog." % len( terminal ) )

    # Check the replicas exist at the requested site
    replicaMetadata = []
    for lfn, requestedSEs in replicas.items():
      lfnReplicas = fileReplicas.get( lfn )

      # This should not happen in principle, but it was seen
      # after a corrupted staging request has entered the DB
      if not lfnReplicas:
        gLogger.error( "Missing replicas information", "%s %s" % ( lfn, requestedSEs ) )
        continue

      for requestedSE, replicaID in requestedSEs.items():
        if not requestedSE in lfnReplicas.keys():
          terminalReplicaIDs[replicaID] = "LFN not registered at requested SE"
          replicas[lfn].pop( requestedSE )
        else:
          replicaMetadata.append( ( replicaID, lfnReplicas[requestedSE], fileSizes[lfn] ) )

    # Update the states of the files in the database
    if terminalReplicaIDs:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len( terminalReplicaIDs ) )
      # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
      res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
      if not res['OK']:
        gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res['Message'] )
    if replicaMetadata:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len( replicaMetadata ) )
      # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
      res = self.stagerClient.updateReplicaInformation( replicaMetadata )
      if not res['OK']:
        gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res['Message'] )
    return S_OK()

  def __getNewReplicas( self ):
    """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """
    # First obtain the New replicas from the CacheReplicas table
    res = self.stagerClient.getCacheReplicas( {'Status':'New'} )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.debug( "RequestPreparation.__getNewReplicas: No New replicas found to process." )
      return S_OK()
    else:
      gLogger.debug( "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len( res['Value'] ) )
    replicas = {}
    replicaIDs = {}
    for replicaID, info in res['Value'].items():
      lfn = info['LFN']
      storageElement = info['SE']
      replicas.setdefault( lfn, {} )[storageElement] = replicaID
      replicaIDs[replicaID] = ( lfn, storageElement )
    return S_OK( {'Replicas':replicas, 'ReplicaIDs':replicaIDs} )

  def __getExistingFiles( self, lfns ):
    """ This checks that the files exist in the FileCatalog. """
    res = self.fileCatalog.exists( list( set( lfns ) ) )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    success = res['Value']['Successful']
    exist = [lfn for lfn, exists in success.items() if exists]
    missing = list( set( success ) - set( exist ) )
    if missing:
      reason = 'LFN not registered in the FC'
      gLogger.warn( "RequestPreparation.__getExistingFiles: %s" % reason, '\n'.join( [''] + missing ) )
      self.__reportProblematicFiles( missing, 'LFN-LFC-DoesntExist' )
      missing = dict.fromkeys( missing, reason )
    else:
      missing = {}
    return S_OK( {'Exist':exist, 'Missing':missing, 'Failed':failed} )

  def __getFileSize( self, lfns ):
    """ This obtains the file size from the FileCatalog. """
    fileSizes = {}
    zeroSize = {}
    res = self.fileCatalog.getFileSize( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getFileSize: Failed to get sizes for files.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, size in res['Value']['Successful'].items():
      if size == 0:
        zeroSize[lfn] = "LFN registered with zero size in the FileCatalog"
      else:
        fileSizes[lfn] = size
    if zeroSize:
      for lfn, reason in zeroSize.items():
        gLogger.warn( "RequestPreparation.__getFileSize: %s" % reason, lfn )
      self.__reportProblematicFiles( zeroSize.keys(), 'LFN-LFC-ZeroSize' )
    return S_OK( {'FileSizes':fileSizes, 'ZeroSize':zeroSize, 'Failed':failed} )

  def __getFileReplicas( self, lfns ):
    """ This obtains the replicas from the FileCatalog. """
    replicas = {}
    noReplicas = {}
    res = self.dm.getActiveReplicas( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, lfnReplicas in res['Value']['Successful'].items():
      if len( lfnReplicas.keys() ) == 0:
        noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog"
      else:
        replicas[lfn] = lfnReplicas
    if noReplicas:
      for lfn, reason in noReplicas.items():
        gLogger.warn( "RequestPreparation.__getFileReplicas: %s" % reason, lfn )
      self.__reportProblematicFiles( noReplicas.keys(), 'LFN-LFC-NoReplicas' )
    return S_OK( {'Replicas':replicas, 'ZeroReplicas':noReplicas, 'Failed':failed} )

  def __reportProblematicFiles( self, lfns, reason ):
    return S_OK()
    res = self.dataIntegrityClient.setFileProblematic( lfns, reason, sourceComponent = 'RequestPreparationAgent' )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res['Message'] )
      return res
    if res['Value']['Successful']:
      gLogger.info( "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files." % len( res['Value']['Successful'] ) )
    if res['Value']['Failed']:
      gLogger.info( "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files." % len( res['Value']['Failed'] ) )
    return res
Exemplo n.º 5
0
class RequestPreparationAgent(AgentModule):
    def initialize(self):
        self.fileCatalog = FileCatalog()
        self.dm = DataManager()
        self.stagerClient = StorageManagerClient()
        self.dataIntegrityClient = DataIntegrityClient()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption("shifterProxy", "DataManager")

        return S_OK()

    def execute(self):
        """This is the first logical task to be executed and manages the New->Waiting transition of the Replicas"""
        res = self.__getNewReplicas()
        if not res["OK"]:
            gLogger.fatal(
                "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res["Message"]
            )
            return res
        if not res["Value"]:
            gLogger.info("There were no New replicas found")
            return res
        replicas = res["Value"]["Replicas"]
        replicaIDs = res["Value"]["ReplicaIDs"]
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len(replicaIDs)
        )

        # Check if the files exist in the FileCatalog
        res = self.__getExistingFiles(replicas)
        if not res["OK"]:
            return res
        exist = res["Value"]["Exist"]
        terminal = res["Value"]["Missing"]
        failed = res["Value"]["Failed"]
        if not exist:
            gLogger.error("RequestPreparation.prepareNewReplicas: Failed to determine the existence of any file")
            return S_OK()
        terminalReplicaIDs = {}
        for lfn, reason in terminal.items():
            for replicaID in replicas[lfn].values():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info("RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len(exist))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len(terminal)
            )

        # Obtain the file sizes from the FileCatalog
        res = self.__getFileSize(exist)
        if not res["OK"]:
            return res
        failed.update(res["Value"]["Failed"])
        terminal = res["Value"]["ZeroSize"]
        fileSizes = res["Value"]["FileSizes"]
        if not fileSizes:
            gLogger.error("RequestPreparation.prepareNewReplicas: Failed determine sizes of any files")
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len(fileSizes)
        )
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog."
                % len(terminal)
            )

        # Obtain the replicas from the FileCatalog
        res = self.__getFileReplicas(list(fileSizes))
        if not res["OK"]:
            return res
        failed.update(res["Value"]["Failed"])
        terminal = res["Value"]["ZeroReplicas"]
        fileReplicas = res["Value"]["Replicas"]
        if not fileReplicas:
            gLogger.error("RequestPreparation.prepareNewReplicas: Failed determine replicas for any files")
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog."
            % len(fileReplicas)
        )
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog."
                % len(terminal)
            )

        # Check the replicas exist at the requested site
        replicaMetadata = []
        for lfn, requestedSEs in replicas.items():
            lfnReplicas = fileReplicas.get(lfn)

            # This should not happen in principle, but it was seen
            # after a corrupted staging request has entered the DB
            if not lfnReplicas:
                gLogger.error("Missing replicas information", "%s %s" % (lfn, requestedSEs))
                continue

            for requestedSE, replicaID in requestedSEs.items():
                if requestedSE not in lfnReplicas.keys():
                    terminalReplicaIDs[replicaID] = "LFN not registered at requested SE"
                    replicas[lfn].pop(requestedSE)
                else:
                    replicaMetadata.append((replicaID, lfnReplicas[requestedSE], fileSizes[lfn]))

        # Update the states of the files in the database
        if terminalReplicaIDs:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len(terminalReplicaIDs)
            )
            # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
            res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs)
            if not res["OK"]:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res["Message"]
                )
        if replicaMetadata:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len(replicaMetadata)
            )
            # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
            res = self.stagerClient.updateReplicaInformation(replicaMetadata)
            if not res["OK"]:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res["Message"]
                )
        return S_OK()

    def __getNewReplicas(self):
        """This obtains the New replicas from the Replicas table and for each LFN the requested storage element"""
        # First obtain the New replicas from the CacheReplicas table
        res = self.stagerClient.getCacheReplicas({"Status": "New"})
        if not res["OK"]:
            gLogger.error(
                "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res["Message"]
            )
            return res
        if not res["Value"]:
            gLogger.debug("RequestPreparation.__getNewReplicas: No New replicas found to process.")
            return S_OK()
        else:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len(res["Value"])
            )
        replicas = {}
        replicaIDs = {}
        for replicaID, info in res["Value"].items():
            lfn = info["LFN"]
            storageElement = info["SE"]
            replicas.setdefault(lfn, {})[storageElement] = replicaID
            replicaIDs[replicaID] = (lfn, storageElement)
        return S_OK({"Replicas": replicas, "ReplicaIDs": replicaIDs})

    def __getExistingFiles(self, lfns):
        """This checks that the files exist in the FileCatalog."""
        res = self.fileCatalog.exists(list(set(lfns)))
        if not res["OK"]:
            gLogger.error(
                "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res["Message"]
            )
            return res
        failed = res["Value"]["Failed"]
        success = res["Value"]["Successful"]
        exist = [lfn for lfn, exists in success.items() if exists]
        missing = list(set(success) - set(exist))
        if missing:
            reason = "LFN not registered in the FC"
            gLogger.warn("RequestPreparation.__getExistingFiles: %s" % reason, "\n".join([""] + missing))
            self.__reportProblematicFiles(missing, "LFN-LFC-DoesntExist")
            missing = dict.fromkeys(missing, reason)
        else:
            missing = {}
        return S_OK({"Exist": exist, "Missing": missing, "Failed": failed})

    def __getFileSize(self, lfns):
        """This obtains the file size from the FileCatalog."""
        fileSizes = {}
        zeroSize = {}
        res = self.fileCatalog.getFileSize(lfns)
        if not res["OK"]:
            gLogger.error("RequestPreparation.__getFileSize: Failed to get sizes for files.", res["Message"])
            return res
        failed = res["Value"]["Failed"]
        for lfn, size in res["Value"]["Successful"].items():
            if size == 0:
                zeroSize[lfn] = "LFN registered with zero size in the FileCatalog"
            else:
                fileSizes[lfn] = size
        if zeroSize:
            for lfn, reason in zeroSize.items():
                gLogger.warn("RequestPreparation.__getFileSize: %s" % reason, lfn)
            self.__reportProblematicFiles(zeroSize.keys(), "LFN-LFC-ZeroSize")
        return S_OK({"FileSizes": fileSizes, "ZeroSize": zeroSize, "Failed": failed})

    def __getFileReplicas(self, lfns):
        """This obtains the replicas from the FileCatalog."""
        replicas = {}
        noReplicas = {}
        res = self.dm.getActiveReplicas(lfns)
        if not res["OK"]:
            gLogger.error("RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res["Message"])
            return res
        failed = res["Value"]["Failed"]
        for lfn, lfnReplicas in res["Value"]["Successful"].items():
            if len(lfnReplicas) == 0:
                noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog"
            else:
                replicas[lfn] = lfnReplicas
        if noReplicas:
            for lfn, reason in noReplicas.items():
                gLogger.warn("RequestPreparation.__getFileReplicas: %s" % reason, lfn)
            self.__reportProblematicFiles(list(noReplicas), "LFN-LFC-NoReplicas")
        return S_OK({"Replicas": replicas, "ZeroReplicas": noReplicas, "Failed": failed})

    def __reportProblematicFiles(self, lfns, reason):
        return S_OK()
        res = self.dataIntegrityClient.setFileProblematic(lfns, reason, sourceComponent="RequestPreparationAgent")
        if not res["OK"]:
            gLogger.error(
                "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res["Message"]
            )
            return res
        if res["Value"]["Successful"]:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files."
                % len(res["Value"]["Successful"])
            )
        if res["Value"]["Failed"]:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files."
                % len(res["Value"]["Failed"])
            )
        return res
Exemplo n.º 6
0
class ValidateOutputDataAgent(AgentModule):
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.consistencyInspector = ConsistencyInspector()
        self.integrityClient = DataIntegrityClient()
        self.fc = FileCatalog()
        self.transClient = TransformationClient()
        self.fileCatalogClient = FileCatalogClient()

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = agentTSTypes
        else:
            self.transformationTypes = Operations().getValue(
                'Transformations/DataProcessing', ['MCSimulation', 'Merge'])

        self.directoryLocations = sorted(
            self.am_getOption('DirectoryLocations',
                              ['TransformationDB', 'MetadataCatalog']))
        self.transfidmeta = self.am_getOption('TransfIDMeta',
                                              "TransformationID")
        self.enableFlag = True

    #############################################################################

    def initialize(self):
        """ Sets defaults
    """

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        gLogger.info("Will treat the following transformation types: %s" %
                     str(self.transformationTypes))
        gLogger.info(
            "Will search for directories in the following locations: %s" %
            str(self.directoryLocations))
        gLogger.info("Will use %s as metadata tag name for TransformationID" %
                     self.transfidmeta)
        return S_OK()

    #############################################################################

    def execute(self):
        """ The VerifyOutputData execution method
    """
        self.enableFlag = self.am_getOption('EnableFlag', 'True')
        if not self.enableFlag == 'True':
            self.log.info(
                "VerifyOutputData is disabled by configuration option 'EnableFlag'"
            )
            return S_OK('Disabled via CS flag')

        gLogger.info("-" * 40)
        self.updateWaitingIntegrity()
        gLogger.info("-" * 40)

        res = self.transClient.getTransformations({
            'Status':
            'ValidatingOutput',
            'Type':
            self.transformationTypes
        })
        if not res['OK']:
            gLogger.error("Failed to get ValidatingOutput transformations",
                          res['Message'])
            return res
        transDicts = res['Value']
        if not transDicts:
            gLogger.info("No transformations found in ValidatingOutput status")
            return S_OK()
        gLogger.info("Found %s transformations in ValidatingOutput status" %
                     len(transDicts))
        for transDict in transDicts:
            transID = transDict['TransformationID']
            res = self.checkTransformationIntegrity(int(transID))
            if not res['OK']:
                gLogger.error(
                    "Failed to perform full integrity check for transformation %d"
                    % transID)
            else:
                self.finalizeCheck(transID)
                gLogger.info("-" * 40)
        return S_OK()

    def updateWaitingIntegrity(self):
        """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput'
    """
        gLogger.info(
            "Looking for transformations in the WaitingIntegrity status to update"
        )
        res = self.transClient.getTransformations(
            {'Status': 'WaitingIntegrity'})
        if not res['OK']:
            gLogger.error("Failed to get WaitingIntegrity transformations",
                          res['Message'])
            return res
        transDicts = res['Value']
        if not transDicts:
            gLogger.info("No transformations found in WaitingIntegrity status")
            return S_OK()
        gLogger.info("Found %s transformations in WaitingIntegrity status" %
                     len(transDicts))
        for transDict in transDicts:
            transID = transDict['TransformationID']
            gLogger.info("-" * 40)
            res = self.integrityClient.getTransformationProblematics(
                int(transID))
            if not res['OK']:
                gLogger.error(
                    "Failed to determine waiting problematics for transformation",
                    res['Message'])
            elif not res['Value']:
                res = self.transClient.setTransformationParameter(
                    transID, 'Status', 'ValidatedOutput')
                if not res['OK']:
                    gLogger.error(
                        "Failed to update status of transformation %s to ValidatedOutput"
                        % (transID))
                else:
                    gLogger.info(
                        "Updated status of transformation %s to ValidatedOutput"
                        % (transID))
            else:
                gLogger.info(
                    "%d problematic files for transformation %s were found" %
                    (len(res['Value']), transID))
        return

    #############################################################################
    #
    # Get the transformation directories for checking
    #

    def getTransformationDirectories(self, transID):
        """ Get the directories for the supplied transformation from the transformation system
    """
        directories = []
        if 'TransformationDB' in self.directoryLocations:
            res = self.transClient.getTransformationParameters(
                transID, ['OutputDirectories'])
            if not res['OK']:
                gLogger.error("Failed to obtain transformation directories",
                              res['Message'])
                return res
            if not isinstance(res['Value'], list):
                transDirectories = ast.literal_eval(res['Value'])
            else:
                transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if 'MetadataCatalog' in self.directoryLocations:
            res = self.fileCatalogClient.findDirectoriesByMetadata(
                {self.transfidmeta: transID})
            if not res['OK']:
                gLogger.error("Failed to obtain metadata catalog directories",
                              res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)
        if not directories:
            gLogger.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)

    @staticmethod
    def _addDirs(transID, newDirs, existingDirs):
        for nDir in newDirs:
            transStr = str(transID).zfill(8)
            if re.search(transStr, nDir):
                if nDir not in existingDirs:
                    existingDirs.append(nDir)
        return existingDirs

    #############################################################################
    def checkTransformationIntegrity(self, transID):
        """ This method contains the real work
    """
        gLogger.info("-" * 40)
        gLogger.info("Checking the integrity of transformation %s" % transID)
        gLogger.info("-" * 40)

        res = self.getTransformationDirectories(transID)
        if not res['OK']:
            return res
        directories = res['Value']
        if not directories:
            return S_OK()

        ######################################################
        #
        # This check performs Catalog->SE for possible output directories
        #
        res = self.fc.exists(directories)
        if not res['OK']:
            gLogger.error('Failed to check directory existence',
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed']:
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR("Failed to determine the existance of directories")
        directoryExists = res['Value']['Successful']
        for directory in sorted(directoryExists.keys()):
            if not directoryExists[directory]:
                continue
            iRes = self.consistencyInspector.catalogDirectoryToSE(directory)
            if not iRes['OK']:
                gLogger.error(iRes['Message'])
                return iRes

        gLogger.info("-" * 40)
        gLogger.info("Completed integrity check for transformation %s" %
                     transID)
        return S_OK()

    def finalizeCheck(self, transID):
        """ Move to 'WaitingIntegrity' or 'ValidatedOutput'
    """
        res = self.integrityClient.getTransformationProblematics(int(transID))

        if not res['OK']:
            gLogger.error(
                "Failed to determine whether there were associated problematic files",
                res['Message'])
            newStatus = ''
        elif res['Value']:
            gLogger.info(
                "%d problematic files for transformation %s were found" %
                (len(res['Value']), transID))
            newStatus = "WaitingIntegrity"
        else:
            gLogger.info("No problematics were found for transformation %s" %
                         transID)
            newStatus = "ValidatedOutput"
        if newStatus:
            res = self.transClient.setTransformationParameter(
                transID, 'Status', newStatus)
            if not res['OK']:
                gLogger.error(
                    "Failed to update status of transformation %s to %s" %
                    (transID, newStatus))
            else:
                gLogger.info("Updated status of transformation %s to %s" %
                             (transID, newStatus))
        gLogger.info("-" * 40)
        return S_OK()
Exemplo n.º 7
0
class CatalogPlugInTestCase(unittest.TestCase):
  """ Base class for the CatalogPlugin test case """

  def setUp(self):
    self.fullMetadata = ['Status', 'CheckSumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size']
    self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
    self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

    self.catalog = FileCatalog(catalogs=[catalogClientToTest])
    valid = self.catalog.isOK()
    self.assert_(valid)
    self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
    self.link = "%s/link" % self.destDir

    # Clean the existing directory
    self.cleanDirectory()
    res = self.catalog.createDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)

    # Register some files to work with
    self.numberOfFiles = 2
    self.files = []
    for i in range(self.numberOfFiles):
      lfn = "%s/testFile_%d" % (self.destDir,i)
      res = self.registerFile(lfn)
      self.assert_(res)
      self.files.append(lfn)

  def registerFile(self,lfn):
    pfn = 'protocol://host:port/storage/path%s' % lfn
    size = 10000000
    se = 'DIRAC-storage'
    guid = makeGuid()
    adler = stringAdler(guid)
    fileDict = {}
    fileDict[lfn] = {'PFN':pfn,'Size':size,'SE':se,'GUID':guid,'Checksum':adler}
    res = self.catalog.addFile(fileDict)
    return self.parseResult(res,lfn)

  def parseResult(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Successful'])
    self.assert_(res['Value']['Successful'].has_key(path))
    return res['Value']['Successful'][path]

  def parseError(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Failed'])
    self.assert_(res['Value']['Failed'].has_key(path))
    return res['Value']['Failed'][path]    

  def cleanDirectory(self):
    res = self.catalog.exists(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    if not returnValue:
      return
    res = self.catalog.listDirectory(self.destDir)  
    returnValue = self.parseResult(res,self.destDir)
    toRemove = returnValue['Files'].keys()
    if toRemove:
      self.purgeFiles(toRemove)
    res = self.catalog.removeDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    self.assert_(returnValue)

  def purgeFiles(self,lfns):
    for lfn in lfns:
      res = self.catalog.getReplicas(lfn,True)
      replicas = self.parseResult(res,lfn)
      for se,pfn in replicas.items():
        repDict = {}
        repDict[lfn] = {'PFN':pfn,'SE':se}
        res = self.catalog.removeReplica(repDict)
        self.parseResult(res,lfn)   
      res = self.catalog.removeFile(lfn)
      self.parseResult(res,lfn)

  def tearDown(self):
    self.cleanDirectory()
Exemplo n.º 8
0
class ValidateOutputDataAgent( AgentModule ):

  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    AgentModule.__init__( self, *args, **kwargs )

    self.integrityClient = DataIntegrityClient()
    self.fc = FileCatalog()
    self.transClient = TransformationClient()
    self.fileCatalogClient = FileCatalogClient()

    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = agentTSTypes
    else:
      self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )

    self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', ['TransformationDB',
                                                                                  'MetadataCatalog'] ) )
    self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) )
    self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" )
    self.enableFlag = True

  #############################################################################

  def initialize( self ):
    """ Sets defaults
    """
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    gLogger.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )
    gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) )
    gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) )
    gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta )
    return S_OK()

  #############################################################################

  def execute( self ):
    """ The VerifyOutputData execution method
    """
    self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
    if not self.enableFlag == 'True':
      self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" )
      return S_OK( 'Disabled via CS flag' )

    gLogger.info( "-" * 40 )
    self.updateWaitingIntegrity()
    gLogger.info( "-" * 40 )

    res = self.transClient.getTransformations( {'Status':'ValidatingOutput', 'Type':self.transformationTypes} )
    if not res['OK']:
      gLogger.error( "Failed to get ValidatingOutput transformations", res['Message'] )
      return res
    transDicts = res['Value']
    if not transDicts:
      gLogger.info( "No transformations found in ValidatingOutput status" )
      return S_OK()
    gLogger.info( "Found %s transformations in ValidatingOutput status" % len( transDicts ) )
    for transDict in transDicts:
      transID = transDict['TransformationID']
      res = self.checkTransformationIntegrity( int( transID ) )
      if not res['OK']:
        gLogger.error( "Failed to perform full integrity check for transformation %d" % transID )
      else:
        self.finalizeCheck( transID )
        gLogger.info( "-" * 40 )
    return S_OK()

  def updateWaitingIntegrity( self ):
    """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput'
    """
    gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" )
    res = self.transClient.getTransformations( {'Status':'WaitingIntegrity'} )
    if not res['OK']:
      gLogger.error( "Failed to get WaitingIntegrity transformations", res['Message'] )
      return res
    transDicts = res['Value']
    if not transDicts:
      gLogger.info( "No transformations found in WaitingIntegrity status" )
      return S_OK()
    gLogger.info( "Found %s transformations in WaitingIntegrity status" % len( transDicts ) )
    for transDict in transDicts:
      transID = transDict['TransformationID']
      gLogger.info( "-" * 40 )
      res = self.integrityClient.getTransformationProblematics( int( transID ) )
      if not res['OK']:
        gLogger.error( "Failed to determine waiting problematics for transformation", res['Message'] )
      elif not res['Value']:
        res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput' )
        if not res['OK']:
          gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % ( transID ) )
        else:
          gLogger.info( "Updated status of transformation %s to ValidatedOutput" % ( transID ) )
      else:
        gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) )
    return

  #############################################################################
  #
  # Get the transformation directories for checking
  #

  def getTransformationDirectories( self, transID ):
    """ Get the directories for the supplied transformation from the transformation system
    """
    directories = []
    if 'TransformationDB' in self.directoryLocations:
      res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] )
      if not res['OK']:
        gLogger.error( "Failed to obtain transformation directories", res['Message'] )
        return res
      transDirectories = res['Value'].splitlines()
      directories = self._addDirs( transID, transDirectories, directories )

    if 'MetadataCatalog' in self.directoryLocations:
      res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta:transID} )
      if not res['OK']:
        gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] )
        return res
      transDirectories = res['Value']
      directories = self._addDirs( transID, transDirectories, directories )
    if not directories:
      gLogger.info( "No output directories found" )
    directories = sorted( directories )
    return S_OK( directories )

  @staticmethod
  def _addDirs( transID, newDirs, existingDirs ):
    for nDir in newDirs:
      transStr = str( transID ).zfill( 8 )
      if re.search( transStr, nDir ):
        if not nDir in existingDirs:
          existingDirs.append( nDir )
    return existingDirs

  #############################################################################
  def checkTransformationIntegrity( self, transID ):
    """ This method contains the real work
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Checking the integrity of transformation %s" % transID )
    gLogger.info( "-" * 40 )

    res = self.getTransformationDirectories( transID )
    if not res['OK']:
      return res
    directories = res['Value']
    if not directories:
      return S_OK()

    ######################################################
    #
    # This check performs Catalog->SE for possible output directories
    #
    res = self.fc.exists( directories )
    if not res['OK']:
      gLogger.error( res['Message'] )
      return res
    for directory, error in res['Value']['Failed']:
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to determine the existance of directories" )
    directoryExists = res['Value']['Successful']
    for directory in sorted( directoryExists.keys() ):
      if not directoryExists[directory]:
        continue
      iRes = self.integrityClient.catalogDirectoryToSE( directory )
      if not iRes['OK']:
        gLogger.error( iRes['Message'] )
        return iRes

    ######################################################
    #
    # This check performs SE->Catalog for possible output directories
    #
    for storageElementName in sorted( self.activeStorages ):
      res = self.integrityClient.storageDirectoryToCatalog( directories, storageElementName )
      if not res['OK']:
        gLogger.error( res['Message'] )
        return res

    gLogger.info( "-" * 40 )
    gLogger.info( "Completed integrity check for transformation %s" % transID )
    return S_OK()

  def finalizeCheck( self, transID ):
    """ Move to 'WaitingIntegrity' or 'ValidatedOutput'
    """
    res = self.integrityClient.getTransformationProblematics( int( transID ) )
    if not res['OK']:
      gLogger.error( "Failed to determine whether there were associated problematic files", res['Message'] )
      newStatus = ''
    elif res['Value']:
      gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) )
      newStatus = "WaitingIntegrity"
    else:
      gLogger.info( "No problematics were found for transformation %s" % transID )
      newStatus = "ValidatedOutput"
    if newStatus:
      res = self.transClient.setTransformationParameter( transID, 'Status', newStatus )
      if not res['OK']:
        gLogger.error( "Failed to update status of transformation %s to %s" % ( transID, newStatus ) )
      else:
        gLogger.info( "Updated status of transformation %s to %s" % ( transID, newStatus ) )
    gLogger.info( "-" * 40 )
    return S_OK()
Exemplo n.º 9
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        super(DataIntegrityClient, self).__init__(**kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(lfn, list):
            lfns = lfn
        elif isinstance(lfn, basestring):
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sorted(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(replicaTuple, tuple):
            replicaTuple = [replicaTuple]
        elif isinstance(replicaTuple, list):
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if catalogSize == bookkeepingSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    #FIXME: Unused?
    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    #FIXME: Unused?
    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    #FIXME: Unused?
    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    #FIXME: Unused?
    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    #FIXME: Unused?
    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    #FIXME: Unused?
    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)

    def _reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic
    """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sorted(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')
Exemplo n.º 10
0
class ValidateOutputDataAgent(DIRACValidateOutputDataAgent):
    """ Simple extension of base class
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        DIRACValidateOutputDataAgent.__init__(self, *args, **kwargs)

        self.integrityClient = None
        self.fileCatalog = None
        self.transClient = None
        self.storageUsageClient = None

    def initialize(self):
        """ standard initialize method for DIRAC agents
    """
        res = DIRACValidateOutputDataAgent.initialize(self)
        if not res['OK']:
            return res

        self.integrityClient = DataIntegrityClient()
        self.fileCatalog = FileCatalog()
        self.transClient = TransformationClient()
        self.storageUsageClient = StorageUsageClient()

        return S_OK()

    def checkTransformationIntegrity(self, prodID):
        """ This method contains the real work
    """
        gLogger.info("-" * 40)
        gLogger.info("Checking the integrity of production %s" % prodID)
        gLogger.info("-" * 40)

        res = self.getTransformationDirectories(prodID)
        if not res['OK']:
            return res
        directories = res['Value']

        ######################################################
        #
        # This check performs BK->Catalog->SE
        #
        res = self.integrityClient.productionToCatalog(prodID)
        if not res['OK']:
            gLogger.error(res['Message'])
            return res
        bk2catalogMetadata = res['Value']['CatalogMetadata']
        bk2catalogReplicas = res['Value']['CatalogReplicas']
        res = self.integrityClient.checkPhysicalFiles(bk2catalogReplicas,
                                                      bk2catalogMetadata)
        if not res['OK']:
            gLogger.error(res['Message'])
            return res

        if not directories:
            return S_OK()

        ######################################################
        #
        # This check performs Catalog->BK and Catalog->SE for possible output directories
        #
        res = self.fileCatalog.exists(directories)
        if not res['OK']:
            gLogger.error(res['Message'])
            return res
        for directory, error in res['Value']['Failed']:
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR("Failed to determine the existance of directories")
        directoryExists = res['Value']['Successful']
        for directory in sorted(directoryExists.keys()):
            if not directoryExists[directory]:
                continue
            iRes = self.integrityClient.catalogDirectoryToBK(directory)
            if not iRes['OK']:
                gLogger.error(iRes['Message'])
                return iRes
            catalogDirMetadata = iRes['Value']['CatalogMetadata']
            catalogDirReplicas = iRes['Value']['CatalogReplicas']
            catalogMetadata = {}
            catalogReplicas = {}
            for lfn in catalogDirMetadata.keys():
                if lfn not in bk2catalogMetadata.keys():
                    catalogMetadata[lfn] = catalogDirMetadata[lfn]
                    if lfn in catalogDirReplicas:
                        catalogReplicas[lfn] = catalogDirReplicas[lfn]
            if not catalogMetadata:
                continue
            res = self.integrityClient.checkPhysicalFiles(
                catalogReplicas, catalogMetadata)
            if not res['OK']:
                gLogger.error(res['Message'])
                return res

        return S_OK()

    def getTransformationDirectories(self, transID):
        """ get the directories for the supplied transformation from the transformation system

    :param self: self reference
    :param int transID: transformation ID
    """

        res = DIRACValidateOutputDataAgent.getTransformationDirectories(
            self, transID)

        if res['OK']:
            directories = res['Value']
        else:
            return res

        if 'StorageUsage' in self.directoryLocations:
            res = self.storageUsageClient.getStorageDirectories(
                '', '', transID, [])
            if not res['OK']:
                self.log.error("Failed to obtain storage usage directories",
                               res['Message'])
                return res
            transDirectories = res['Value']
            directories = self._addDirs(transID, transDirectories, directories)

        if not directories:
            self.log.info("No output directories found")
        directories = sorted(directories)
        return S_OK(directories)
Exemplo n.º 11
0
class DataIntegrityClient( Client ):

  """  
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB
 
  """

  def __init__( self, **kwargs ):

    Client.__init__( self, **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  ##########################################################################
  #
  # This section contains the specific methods for LFC->SE checks
  #

  def catalogDirectoryToSE( self, lfnDir ):
    """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getCatalogDirectoryContents( lfnDir )
    if not res['OK']:
      return res
    replicas = res['Value']['Replicas']
    catalogMetadata = res['Value']['Metadata']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def catalogFileToSE( self, lfns ):
    """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfns ) in types.StringTypes:
      lfns = [lfns]
    res = self.__getCatalogMetadata( lfns )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    res = self.__getCatalogReplicas( catalogMetadata.keys() )
    if not res['OK']:
      return res
    replicas = res['Value']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses )

  def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
    sePfns = {}
    pfnLfns = {}
    for lfn, replicaDict in replicas.items():
      for se, pfn in replicaDict.items():
        if ( ses ) and ( se not in ses ):
          continue
        if not sePfns.has_key( se ):
          sePfns[se] = []
        sePfns[se].append( pfn )
        pfnLfns[pfn] = lfn
    gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) )
    for site in sortList( sePfns.keys() ):
      files = len( sePfns[site] )
      gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) )

    for se in sortList( sePfns.keys() ):
      pfns = sePfns[se]
      pfnDict = {}
      for pfn in pfns:
        pfnDict[pfn] = pfnLfns[pfn]
      sizeMismatch = []
      res = self.__checkPhysicalFileMetadata( pfnDict, se )
      if not res['OK']:
        gLogger.error( 'Failed to get physical file metadata.', res['Message'] )
        return res
      for pfn, metadata in res['Value'].items():
        if catalogMetadata.has_key( pfnLfns[pfn] ):
          if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ):
            sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) )
      if sizeMismatch:
        self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' )
    return S_OK()

  def __checkPhysicalFileMetadata( self, pfnLfns, se ):
    """ Check obtain the physical file metadata and check the files are available
    """
    gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) )


    res = StorageElement( se ).getFileMetadata( pfnLfns.keys() )

    if not res['OK']:
      gLogger.error( 'Failed to get metadata for pfns.', res['Message'] )
      return res
    pfnMetadataDict = res['Value']['Successful']
    # If the replicas are completely missing
    missingReplicas = []
    for pfn, reason in res['Value']['Failed'].items():
      if re.search( 'File does not exist', reason ):
        missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) )
    if missingReplicas:
      self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' )
    lostReplicas = []
    unavailableReplicas = []
    zeroSizeReplicas = []
    # If the files are not accessible
    for pfn, pfnMetadata in pfnMetadataDict.items():
      if pfnMetadata['Lost']:
        lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) )
      if pfnMetadata['Unavailable']:
        unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) )
      if pfnMetadata['Size'] == 0:
        zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) )
    if lostReplicas:
      self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' )
    if unavailableReplicas:
      self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' )
    if zeroSizeReplicas:
      self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' )
    gLogger.info( 'Checking the integrity of physical files at %s complete' % se )
    return S_OK( pfnMetadataDict )

  ##########################################################################
  #
  # This section contains the specific methods for SE->LFC checks
  #

  def storageDirectoryToCatalog( self, lfnDir, storageElement ):
    """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the SE->LFC check at %s" % storageElement )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getStorageDirectoryContents( lfnDir, storageElement )
    if not res['OK']:
      return res
    storageFileMetadata = res['Value']
    if storageFileMetadata:
      return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement )
    return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} )

  def __checkCatalogForSEFiles( self, storageMetadata, storageElement ):
    gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) )

    # RF_NOTE : this comment is completely wrong
    # First get all the PFNs as they should be registered in the catalog
    res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    for original, registered in res['Value']['Successful'].items():
      storageMetadata[registered] = storageMetadata.pop( original )
    # Determine whether these PFNs are registered and if so obtain the LFN
    res = self.fc.getLFNForPFN( storageMetadata.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] )
      return res
    failedPfns = res['Value']['Failed']
    notRegisteredPfns = []
    for pfn, error in failedPfns.items():
      if re.search( 'No such file or directory', error ):
        notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) )
        failedPfns.pop( pfn )
    if notRegisteredPfns:
      self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' )
    if failedPfns:
      return S_ERROR( 'Failed to obtain LFNs for PFNs' )
    pfnLfns = res['Value']['Successful']
    for pfn in storageMetadata.keys():
      pfnMetadata = storageMetadata.pop( pfn )
      if pfn in pfnLfns.keys():
        lfn = pfnLfns[pfn]
        storageMetadata[lfn] = pfnMetadata
        storageMetadata[lfn]['PFN'] = pfn
    # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
    res = self.__getCatalogMetadata( storageMetadata.keys() )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    sizeMismatch = []
    for lfn, lfnCatalogMetadata in catalogMetadata.items():
      lfnStorageMetadata = storageMetadata[lfn]
      if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ):
        sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) )
    if sizeMismatch:
      self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' )
    gLogger.info( 'Checking storage files exist in the catalog complete' )
    resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata}
    return S_OK( resDict )

  def getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
    return self.__getStorageDirectoryContents( lfnDir, storageElement )

  def __getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ Obtians the contents of the supplied directory on the storage
    """
    gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnDir )

    if not res['OK']:
      gLogger.error( "Failed to get PFNs for directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectories = res['Value']['Successful'].values()
    res = se.exists( storageDirectories )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of directory' )
    directoryExists = res['Value']['Successful']
    activeDirs = []
    for directory in sortList( directoryExists.keys() ):
      exists = directoryExists[directory]
      if exists:
        activeDirs.append( directory )
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = se.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
        return S_ERROR( res['Value']['Failed'][currentDir] )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        fileMetadata = dirContents['Files']

        # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention
#         res = { "Successful" : {}, "Failed" : {} }
#         for pfn in fileMetadata:
#           inRes = se.getPfnPath( pfn )
#           if inRes["OK"]:
#             res["Successful"][pfn] = inRes["Value"]
#           else:
#             res["Failed"][pfn] = inRes["Message"]
        res = se.getLfnForPfn( fileMetadata.keys() )
        if not res['OK']:
          gLogger.error( 'Failed to get directory content LFNs', res['Message'] )
          return res

        for pfn, error in res['Value']['Failed'].items():
          gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) )
        if res['Value']['Failed']:
          return S_ERROR( "Failed to get LFNs for PFNs" )
        pfnLfns = res['Value']['Successful']
        for pfn, lfn in pfnLfns.items():
          fileMetadata[pfn]['LFN'] = lfn
        allFiles.update( fileMetadata )
    zeroSizeFiles = []
    lostFiles = []
    unavailableFiles = []
    for pfn in sortList( allFiles.keys() ):
      if os.path.basename( pfn ) == 'dirac_directory':
        allFiles.pop( pfn )
      else:
        metadata = allFiles[pfn]
        if metadata['Size'] == 0:
          zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) )
        # if metadata['Lost']:
        #  lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost'))
        # if metadata['Unavailable']:
        #  unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable'))
    if zeroSizeFiles:
      self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' )
    if lostFiles:
      self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' )
    if unavailableFiles:
      self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' )
    gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) )
    return S_OK( allFiles )

  def __getStoragePathExists( self, lfnPaths, storageElement ):
    gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnPaths )
    if not res['OK']:
      gLogger.error( "Failed to get PFNs for LFNs", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain PFNs from LFNs' )
    lfnPfns = res['Value']['Successful']
    pfnLfns = {}
    for lfn, pfn in lfnPfns.items():
      pfnLfns[pfn] = lfn

    res = se.exists( pfnLfns )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of paths", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of paths' )
    pathExists = res['Value']['Successful']
    resDict = {}
    for pfn, exists in pathExists.items():
      if exists:
        resDict[pfnLfns[pfn]] = pfn
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the specific methods for obtaining replica and metadata information from the catalog
  #

  def __getCatalogDirectoryContents( self, lfnDir ):
    """ Obtain the contents of the supplied directory
    """
    gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) )

    activeDirs = lfnDir
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.fc.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )

    zeroReplicaFiles = []
    zeroSizeFiles = []
    allReplicaDict = {}
    allMetadataDict = {}
    for lfn, lfnDict in allFiles.items():
      lfnReplicas = {}
      for se, replicaDict in lfnDict['Replicas'].items():
        lfnReplicas[se] = replicaDict['PFN']
      if not lfnReplicas:
        zeroReplicaFiles.append( lfn )
      allReplicaDict[lfn] = lfnReplicas
      allMetadataDict[lfn] = lfnDict['MetaData']
      if lfnDict['MetaData']['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) )
    resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict}
    return S_OK( resDict )

  def __getCatalogReplicas( self, lfns ):
    """ Obtain the file replicas from the catalog while checking that there are replicas
    """
    gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) )

    zeroReplicaFiles = []
    res = self.fc.getReplicas( lfns, allStatus = True )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog replicas', res['Message'] )
      return res
    allReplicas = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File has zero replicas', error ):
        zeroReplicaFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    gLogger.info( 'Obtaining the replicas for files complete' )
    return S_OK( allReplicas )

  def __getCatalogMetadata( self, lfns ):
    """ Obtain the file metadata from the catalog while checking they exist
    """
    if not lfns:
      return S_OK( {} )
    gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) )

    missingCatalogFiles = []
    zeroSizeFiles = []
    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog metadata', res['Message'] )
      return res
    allMetadata = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'No such file or directory', error ):
        missingCatalogFiles.append( lfn )
    if missingCatalogFiles:
      self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' )
    for lfn, metadata in allMetadata.items():
      if metadata['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtaining the catalog metadata complete' )
    return S_OK( allMetadata )

  ##########################################################################
  #
  # This section contains the methods for inserting problematic files into the integrity DB
  #

  def __reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sortList( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( lfn ) == types.ListType:
      lfns = lfn
    elif type( lfn ) == types.StringType:
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def __reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, pfn, se, reason in sortList( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
      else:
        gLogger.info( pfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( replicaTuple ) == types.TupleType:
      replicaTuple = [replicaTuple]
    elif type( replicaTuple ) == types.ListType:
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( res['Message'] )
    return res

  def __getRegisteredPFNLFN( self, pfn, storageElement ):

    res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    registeredPFN = res['Value']['Successful'][pfn]
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) )
    if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
      return S_OK( False )
    return S_OK( res['Value'] )

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if ( catalogSize == bookkeepingSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = se.getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    problematicDict['PFN'] = res['Value']['Successful'][pfn]

    res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    res = self.__getRegisteredPFNLFN( pfn, seName )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    lfn = res['Value']
    if not lfn:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if res['Value'].has_key( lfn ):
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          pfn = res['Value'][lfn]
          self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
Exemplo n.º 12
0
class RequestPreparationAgent(AgentModule):
    def initialize(self):
        self.fileCatalog = FileCatalog()
        self.dm = DataManager()
        self.stagerClient = StorageManagerClient()
        self.dataIntegrityClient = DataIntegrityClient()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):
        res = self.prepareNewReplicas()
        return res

    def prepareNewReplicas(self):
        """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
        res = self.__getNewReplicas()
        if not res['OK']:
            gLogger.fatal(
                "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.info("There were no New replicas found")
            return res
        replicas = res['Value']['Replicas']
        replicaIDs = res['Value']['ReplicaIDs']
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation."
            % len(replicaIDs))

        # Check that the files exist in the FileCatalog
        res = self.__getExistingFiles(replicas.keys())
        if not res['OK']:
            return res
        exist = res['Value']['Exist']
        terminal = res['Value']['Missing']
        failed = res['Value']['Failed']
        if not exist:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine existance of any files'
            )
            return S_OK()
        terminalReplicaIDs = {}
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog."
            % len(exist))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog."
                % len(terminal))

        # Obtain the file sizes from the FileCatalog
        res = self.__getFileSize(exist)
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroSize']
        fileSizes = res['Value']['FileSizes']
        if not fileSizes:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog."
            % len(fileSizes))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog."
                % len(terminal))

        # Obtain the replicas from the FileCatalog
        res = self.__getFileReplicas(fileSizes.keys())
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroReplicas']
        fileReplicas = res['Value']['Replicas']
        if not fileReplicas:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog."
            % len(fileReplicas))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog."
                % len(terminal))

        # Check the replicas exist at the requested site
        replicaMetadata = []
        for lfn, requestedSEs in replicas.items():
            lfnReplicas = fileReplicas[lfn]
            for requestedSE, replicaID in requestedSEs.items():
                if not requestedSE in lfnReplicas.keys():
                    terminalReplicaIDs[
                        replicaID] = "LFN not registered at requested SE"
                    replicas[lfn].pop(requestedSE)
                else:
                    replicaMetadata.append(
                        (replicaID, lfnReplicas[requestedSE], fileSizes[lfn]))

        # Update the states of the files in the database
        if terminalReplicaIDs:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed."
                % len(terminalReplicaIDs))
            # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
            res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica failures.",
                    res['Message'])
        if replicaMetadata:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated."
                % len(replicaMetadata))
            # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
            res = self.stagerClient.updateReplicaInformation(replicaMetadata)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.",
                    res['Message'])
        return S_OK()

    def __getNewReplicas(self):
        """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """
        # First obtain the New replicas from the CacheReplicas table
        res = self.stagerClient.getCacheReplicas({'Status': 'New'})
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: No New replicas found to process."
            )
            return S_OK()
        else:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process."
                % len(res['Value']))
        replicas = {}
        replicaIDs = {}
        for replicaID, info in res['Value'].items():
            lfn = info['LFN']
            storageElement = info['SE']
            if not replicas.has_key(lfn):
                replicas[lfn] = {}
            replicas[lfn][storageElement] = replicaID
            replicaIDs[replicaID] = (lfn, storageElement)
        return S_OK({'Replicas': replicas, 'ReplicaIDs': replicaIDs})

    def __getExistingFiles(self, lfns):
        """ This checks that the files exist in the FileCatalog. """
        filesExist = []
        missing = {}
        res = self.fileCatalog.exists(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, exists in res['Value']['Successful'].items():
            if exists:
                filesExist.append(lfn)
            else:
                missing[lfn] = 'LFN not registered in the FileCatalog'
        if missing:
            for lfn, reason in missing.items():
                gLogger.warn(
                    "RequestPreparation.__getExistingFiles: %s" % reason, lfn)
            self.__reportProblematicFiles(missing.keys(),
                                          'LFN-LFC-DoesntExist')
        return S_OK({
            'Exist': filesExist,
            'Missing': missing,
            'Failed': failed
        })

    def __getFileSize(self, lfns):
        """ This obtains the file size from the FileCatalog. """
        fileSizes = {}
        zeroSize = {}
        res = self.fileCatalog.getFileSize(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getFileSize: Failed to get sizes for files.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, size in res['Value']['Successful'].items():
            if size == 0:
                zeroSize[
                    lfn] = "LFN registered with zero size in the FileCatalog"
            else:
                fileSizes[lfn] = size
        if zeroSize:
            for lfn, reason in zeroSize.items():
                gLogger.warn("RequestPreparation.__getFileSize: %s" % reason,
                             lfn)
            self.__reportProblematicFiles(zeroSize.keys(), 'LFN-LFC-ZeroSize')
        return S_OK({
            'FileSizes': fileSizes,
            'ZeroSize': zeroSize,
            'Failed': failed
        })

    def __getFileReplicas(self, lfns):
        """ This obtains the replicas from the FileCatalog. """
        replicas = {}
        noReplicas = {}
        res = self.dm.getActiveReplicas(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, lfnReplicas in res['Value']['Successful'].items():
            if len(lfnReplicas.keys()) == 0:
                noReplicas[
                    lfn] = "LFN registered with zero replicas in the FileCatalog"
            else:
                replicas[lfn] = lfnReplicas
        if noReplicas:
            for lfn, reason in noReplicas.items():
                gLogger.warn(
                    "RequestPreparation.__getFileReplicas: %s" % reason, lfn)
            self.__reportProblematicFiles(noReplicas.keys(),
                                          'LFN-LFC-NoReplicas')
        return S_OK({
            'Replicas': replicas,
            'ZeroReplicas': noReplicas,
            'Failed': failed
        })

    def __reportProblematicFiles(self, lfns, reason):
        return S_OK()
        res = self.dataIntegrityClient.setFileProblematic(
            lfns, reason, sourceComponent='RequestPreparationAgent')
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__reportProblematicFiles: Failed to report missing files.",
                res['Message'])
            return res
        if res['Value']['Successful']:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files."
                % len(res['Value']['Successful']))
        if res['Value']['Failed']:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files."
                % len(res['Value']['Failed']))
        return res
Exemplo n.º 13
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """

    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assert_(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assert_(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {"PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler}
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Successful"])
        self.assert_(res["Value"]["Successful"].has_key(path))
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Failed"])
        self.assert_(res["Value"]["Failed"].has_key(path))
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue["Files"].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assert_(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Exemplo n.º 14
0
class DataIntegrityClient( Client ):

  """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """

  def __init__( self, **kwargs ):

    super(DataIntegrityClient, self).__init__( **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( lfn, list ):
      lfns = lfn
    elif isinstance( lfn, basestring ):
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, _pfn, se, reason in sorted( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( replicaTuple, tuple ):
      replicaTuple = [replicaTuple]
    elif isinstance( replicaTuple, list ):
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( 'DataIntegrityClient failure', res['Message'] )
    return res

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  #FIXME: Unused?
  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = returnSingleResult( se.removeFile( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = returnSingleResult( se.getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = returnSingleResult( se.getURL( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )

    problematicDict['PFN'] = res['Value']

    res = returnSingleResult( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  #FIXME: Unused?
  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = returnSingleResult( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  #FIXME: Unused?
  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = returnSingleResult( StorageElement( se ).exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = returnSingleResult( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  #FIXME: Unused?
  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = returnSingleResult( StorageElement( se ).getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  #FIXME: Unused?
  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = returnSingleResult( se.getFileSize( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = returnSingleResult( se.removeFile( lfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )


    res = returnSingleResult( self.fc.getReplicas( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if seName not in res['Value']:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  #FIXME: Unused?
  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if lfn in res['Value']:
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          self.reportProblematicReplicas( [( lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = returnSingleResult( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( 'DataIntegrityClient: failed to remove file', res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )


  def _reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic
    """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sorted( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )
  directories = []
  for inputFileName in inputNames:
    try:
      inputFile = open( inputFileName, 'r' )
      stringIn = inputFile.read()
      directories += stringIn.splitlines()
      inputFile.close()
    except:
      directories.append( inputFileName )

  ######################################################
  #
  # This check performs Catalog->BK and Catalog->SE for possible output directories
  #
  res = fc.exists( directories )
  if not res['OK']:
    gLogger.error( res['Message'] )
    DIRAC.exit( -2 )
  for directory, error in res['Value']['Failed']:
    gLogger.error( 'Failed to determine existence of directory', '%s %s' % ( directory, error ) )
  if res['Value']['Failed']:
    DIRAC.exit( -2 )
  directoryExists = res['Value']['Successful']
  for directory in sorted( directoryExists ):
    if not directoryExists[directory]:
      continue
    gLogger.info( "Checking the integrity of %s" % directory )
    iRes = integrity.catalogDirectoryToBK( directory )
    if not iRes['OK']:
      gLogger.error( 'Error getting directory content:', iRes['Message'] )