def getOutgoingFiles( self, transferDict ): """ Get list of files to be processed from InputPath """ inputFCName = transferDict['InputFC'] inputPath = transferDict['InputPath'] if inputFCName == 'LocalDisk': files = [] try: for fileName in os.listdir( inputPath ): if os.path.isfile( os.path.join( inputPath, fileName ) ): files.append( fileName ) except: pass return files inputFC = FileCatalog( [inputFCName] ) result = inputFC.listDirectory( inputPath, True ) if not result['OK']: self.log.error( result['Message'] ) return [] if not inputPath in result['Value']['Successful']: self.log.error( result['Value']['Failed'][inputPath] ) return [] subDirs = result['Value']['Successful'][inputPath]['SubDirs'] files = result['Value']['Successful'][inputPath]['Files'] for subDir in subDirs: self.log.info( 'Ignoring subdirectory:', subDir ) return files.keys()
def __getCatalogDirectoryContents(self, directories): """ get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info('Obtaining the catalog contents for %d directories:' % len(directories)) for directory in directories: self.log.info(directory) activeDirs = directories allFiles = {} fc = FileCatalog() while activeDirs: currentDir = activeDirs[0] res = returnSingleResult(fc.listDirectory(currentDir)) activeDirs.remove(currentDir) if not res['OK'] and 'Directory does not exist' in res['Message']: # FIXME: DFC should return errno self.log.info("The supplied directory %s does not exist" % currentDir) elif not res['OK']: if "No such file or directory" in res['Message']: self.log.info("%s: %s" % (currentDir, res['Message'])) else: self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message'])) else: dirContents = res['Value'] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) self.log.info("Found %d files" % len(allFiles)) return S_OK(allFiles.keys())
def getOutgoingFiles(self, transferDict): """ Get list of files to be processed from InputPath """ inputFCName = transferDict['InputFC'] inputPath = transferDict['InputPath'] if inputFCName == 'LocalDisk': files = [] try: for file in os.listdir(inputPath): if os.path.isfile(os.path.join(inputPath, file)): files.append(file) except: pass return files inputFC = FileCatalog([inputFCName]) result = inputFC.listDirectory(inputPath, True) if not result['OK']: self.log.error(result['Message']) return [] if not inputPath in result['Value']['Successful']: self.log.error(result['Value']['Failed'][inputPath]) return [] subDirs = result['Value']['Successful'][inputPath]['SubDirs'] files = result['Value']['Successful'][inputPath]['Files'] for dir in subDirs: self.log.info('Ignoring subdirectory:', dir) return files.keys()
def __getCatalogDirectoryContents(self, directories): """ get catalog contents under paths :directories: :param self: self reference :param list directories: list of paths in catalog """ self.log.info('Obtaining the catalog contents for %d directories:' % len(directories)) for directory in directories: self.log.info(directory) activeDirs = directories allFiles = {} fc = FileCatalog() while activeDirs: currentDir = activeDirs[0] res = returnSingleResult(fc.listDirectory(currentDir)) activeDirs.remove(currentDir) if not res['OK'] and 'Directory does not exist' in res[ 'Message']: # FIXME: DFC should return errno self.log.info("The supplied directory %s does not exist" % currentDir) elif not res['OK']: if "No such file or directory" in res['Message']: self.log.info("%s: %s" % (currentDir, res['Message'])) else: self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message'])) else: dirContents = res['Value'] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) self.log.info("Found %d files" % len(allFiles)) return S_OK(allFiles.keys())
class DataIntegrityClient(Client): """ The following methods are supported in the service but are not mentioned explicitly here: getProblematic() Obtains a problematic file from the IntegrityDB based on the LastUpdate time getPrognosisProblematics(prognosis) Obtains all the problematics of a particular prognosis from the integrityDB getProblematicsSummary() Obtains a count of the number of problematics for each prognosis found getDistinctPrognosis() Obtains the distinct prognosis found in the integrityDB getTransformationProblematics(prodID) Obtains the problematics for a given production incrementProblematicRetry(fileID) Increments the retry count for the supplied file ID changeProblematicPrognosis(fileID,newPrognosis) Changes the prognosis of the supplied file to the new prognosis setProblematicStatus(fileID,status) Updates the status of a problematic in the integrityDB removeProblematic(self,fileID) This removes the specified file ID from the integrity DB insertProblematic(sourceComponent,fileMetadata) Inserts file with supplied metadata into the integrity DB """ def __init__(self, **kwargs): Client.__init__(self, **kwargs) self.setServer('DataManagement/DataIntegrity') self.dm = DataManager() self.fc = FileCatalog() ########################################################################## # # This section contains the specific methods for LFC->SE checks # def catalogDirectoryToSE(self, lfnDir): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) if type(lfnDir) in types.StringTypes: lfnDir = [lfnDir] res = self.__getCatalogDirectoryContents(lfnDir) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.__checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def catalogFileToSE(self, lfns): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) if type(lfns) in types.StringTypes: lfns = [lfns] res = self.__getCatalogMetadata(lfns) if not res['OK']: return res catalogMetadata = res['Value'] res = self.__getCatalogReplicas(catalogMetadata.keys()) if not res['OK']: return res replicas = res['Value'] res = self.__checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]): """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses) def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]): """ This obtains the physical file metadata and checks the metadata against the catalog entries """ seLfns = {} for lfn, replicaDict in replicas.items(): for se, _url in replicaDict.items(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20))) for se in sortList(seLfns): files = len(seLfns[se]) gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res['OK']: gLogger.error('Failed to get physical file metadata.', res['Message']) return res for lfn, metadata in res['Value'].items(): if lfn in catalogMetadata: if (metadata['Size'] != catalogMetadata[lfn]['Size']) and ( metadata['Size'] != 0): sizeMismatch.append((lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.__reportProblematicReplicas(sizeMismatch, se, 'CatalogPFNSizeMismatch') return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """ Check obtain the physical file metadata and check the files are available """ gLogger.info('Checking the integrity of %s physical files at %s' % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get metadata for lfns.', res['Message']) return res lfnMetadataDict = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res['Value']['Failed'].items(): if re.search('File does not exist', reason): missingReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNMissing')) if missingReplicas: self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing') lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, lfnMetadata in lfnMetadataDict.items(): if lfnMetadata['Lost']: lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost')) if lfnMetadata['Unavailable']: unavailableReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNUnavailable')) if lfnMetadata['Size'] == 0: zeroSizeReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNZeroSize')) if lostReplicas: self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost') if unavailableReplicas: self.__reportProblematicReplicas(unavailableReplicas, se, 'PFNUnavailable') if zeroSizeReplicas: self.__reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize') gLogger.info( 'Checking the integrity of physical files at %s complete' % se) return S_OK(lfnMetadataDict) ########################################################################## # # This section contains the specific methods for SE->LFC checks # def storageDirectoryToCatalog(self, lfnDir, storageElement): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info("-" * 40) gLogger.info("Performing the SE->LFC check at %s" % storageElement) gLogger.info("-" * 40) if type(lfnDir) in types.StringTypes: lfnDir = [lfnDir] res = self.__getStorageDirectoryContents(lfnDir, storageElement) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement) return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}}) def __checkCatalogForSEFiles(self, storageMetadata, storageElement): gLogger.info('Checking %s storage files exist in the catalog' % len(storageMetadata)) res = self.fc.getReplicas(storageMetadata) if not res['OK']: gLogger.error("Failed to get replicas for LFN", res['Message']) return res failedLfns = res['Value']['Failed'] successfulLfns = res['Value']['Successful'] notRegisteredLfns = [] for lfn in storageMetadata: if lfn in failedLfns: if 'No such file or directory' in failedLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) failedLfns.pop(lfn) elif storageElement not in successfulLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) if notRegisteredLfns: self.__reportProblematicReplicas(notRegisteredLfns, storageElement, 'LFNNotRegistered') if failedLfns: return S_ERROR('Failed to obtain replicas') # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata res = self.__getCatalogMetadata(storageMetadata) if not res['OK']: return res catalogMetadata = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.items(): lfnStorageMetadata = storageMetadata[lfn] if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and ( lfnStorageMetadata['Size'] != 0): sizeMismatch.append((lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.__reportProblematicReplicas(sizeMismatch, storageElement, 'CatalogPFNSizeMismatch') gLogger.info('Checking storage files exist in the catalog complete') resDict = { 'CatalogMetadata': catalogMetadata, 'StorageMetadata': storageMetadata } return S_OK(resDict) def getStorageDirectoryContents(self, lfnDir, storageElement): """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ return self.__getStorageDirectoryContents(lfnDir, storageElement) def __getStorageDirectoryContents(self, lfnDir, storageElement): """ Obtians the contents of the supplied directory on the storage """ gLogger.info('Obtaining the contents for %s directories at %s' % (len(lfnDir), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnDir) if not res['OK']: gLogger.error("Failed to obtain existance of directories", res['Message']) return res for directory, error in res['Value']['Failed'].items(): gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR('Failed to determine existance of directory') directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sorted(directoryExists): exists = directoryExists[directory] if exists: activeDirs.append(directory) allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = se.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif currentDir in res['Value']['Failed']: gLogger.error( 'Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) return S_ERROR(res['Value']['Failed'][currentDir]) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( se.getLFNFromURL(dirContents['SubDirs']).get( 'Value', {}).get('Successful', [])) fileURLMetadata = dirContents['Files'] fileMetadata = {} res = se.getLFNFromURL(fileURLMetadata) if not res['OK']: gLogger.error('Failed to get directory content LFNs', res['Message']) return res for url, error in res['Value']['Failed'].items(): gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error)) if res['Value']['Failed']: return S_ERROR("Failed to get LFNs for PFNs") urlLfns = res['Value']['Successful'] for urlLfn, lfn in urlLfns.items(): fileMetadata[lfn] = fileURLMetadata[urlLfn] allFiles.update(fileMetadata) zeroSizeFiles = [] for lfn in sorted(allFiles): if os.path.basename(lfn) == 'dirac_directory': allFiles.pop(lfn) else: metadata = allFiles[lfn] if metadata['Size'] == 0: zeroSizeFiles.append( (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize')) if zeroSizeFiles: self.__reportProblematicReplicas(zeroSizeFiles, storageElement, 'PFNZeroSize') gLogger.info('Obtained at total of %s files for directories at %s' % (len(allFiles), storageElement)) return S_OK(allFiles) def __getStoragePathExists(self, lfnPaths, storageElement): gLogger.info('Determining the existance of %d files at %s' % (len(lfnPaths), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnPaths) if not res['OK']: gLogger.error("Failed to obtain existance of paths", res['Message']) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error('Failed to determine existance of path', '%s %s' % (lfnPath, error)) if res['Value']['Failed']: return S_ERROR('Failed to determine existance of paths') pathExists = res['Value']['Successful'] resDict = {} for lfn, exists in pathExists.items(): if exists: resDict[lfn] = True return S_OK(resDict) ########################################################################## # # This section contains the specific methods for obtaining replica and metadata information from the catalog # def __getCatalogDirectoryContents(self, lfnDir): """ Obtain the contents of the supplied directory """ gLogger.info('Obtaining the catalog contents for %s directories' % len(lfnDir)) activeDirs = lfnDir allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = self.fc.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif res['Value']['Failed'].has_key(currentDir): gLogger.error( 'Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) zeroReplicaFiles = [] zeroSizeFiles = [] allReplicaDict = {} allMetadataDict = {} for lfn, lfnDict in allFiles.items(): lfnReplicas = {} for se, replicaDict in lfnDict['Replicas'].items(): lfnReplicas[se] = replicaDict['PFN'] if not lfnReplicas: zeroReplicaFiles.append(lfn) allReplicaDict[lfn] = lfnReplicas allMetadataDict[lfn] = lfnDict['MetaData'] if lfnDict['MetaData']['Size'] == 0: zeroSizeFiles.append(lfn) if zeroReplicaFiles: self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas') if zeroSizeFiles: self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize') gLogger.info( 'Obtained at total of %s files for the supplied directories' % len(allMetadataDict)) resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict} return S_OK(resDict) def __getCatalogReplicas(self, lfns): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info('Obtaining the replicas for %s files' % len(lfns)) zeroReplicaFiles = [] res = self.fc.getReplicas(lfns, allStatus=True) if not res['OK']: gLogger.error('Failed to get catalog replicas', res['Message']) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search('File has zero replicas', error): zeroReplicaFiles.append(lfn) if zeroReplicaFiles: self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas') gLogger.info('Obtaining the replicas for files complete') return S_OK(allReplicas) def __getCatalogMetadata(self, lfns): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK({}) gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns)) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fc.getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get catalog metadata', res['Message']) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search('No such file or directory', error): missingCatalogFiles.append(lfn) if missingCatalogFiles: self.__reportProblematicFiles(missingCatalogFiles, 'LFNCatalogMissing') for lfn, metadata in allMetadata.items(): if metadata['Size'] == 0: zeroSizeFiles.append(lfn) if zeroSizeFiles: self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize') gLogger.info('Obtaining the catalog metadata complete') return S_OK(allMetadata) ########################################################################## # # This section contains the methods for inserting problematic files into the integrity DB # def __reportProblematicFiles(self, lfns, reason): """ Simple wrapper function around setFileProblematic """ gLogger.info('The following %s files were found with %s' % (len(lfns), reason)) for lfn in sortList(lfns): gLogger.info(lfn) res = self.setFileProblematic(lfns, reason, sourceComponent='DataIntegrityClient') if not res['OK']: gLogger.info('Failed to update integrity DB with files', res['Message']) else: gLogger.info('Successfully updated integrity DB with files') def setFileProblematic(self, lfn, reason, sourceComponent=''): """ This method updates the status of the file in the FileCatalog and the IntegrityDB lfn - the lfn of the file reason - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type(lfn) == types.ListType: lfns = lfn elif type(lfn) == types.StringType: lfns = [lfn] else: errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN." gLogger.error(errStr) return S_ERROR(errStr) gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len(lfns)) fileMetadata = {} for lfn in lfns: fileMetadata[lfn] = { 'Prognosis': reason, 'LFN': lfn, 'PFN': '', 'SE': '' } res = self.insertProblematic(sourceComponent, fileMetadata) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" ) return res def __reportProblematicReplicas(self, replicaTuple, se, reason): """ Simple wrapper function around setReplicaProblematic """ gLogger.info('The following %s files had %s at %s' % (len(replicaTuple), reason, se)) for lfn, _pfn, se, reason in sortList(replicaTuple): if lfn: gLogger.info(lfn) res = self.setReplicaProblematic(replicaTuple, sourceComponent='DataIntegrityClient') if not res['OK']: gLogger.info('Failed to update integrity DB with replicas', res['Message']) else: gLogger.info('Successfully updated integrity DB with replicas') def setReplicaProblematic(self, replicaTuple, sourceComponent=''): """ This method updates the status of the replica in the FileCatalog and the IntegrityDB The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis} lfn - the lfn of the file pfn - the pfn if available (otherwise '') se - the storage element of the problematic replica (otherwise '') prognosis - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type(replicaTuple) == types.TupleType: replicaTuple = [replicaTuple] elif type(replicaTuple) == types.ListType: pass else: errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples." gLogger.error(errStr) return S_ERROR(errStr) gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len(replicaTuple)) replicaDict = {} for lfn, pfn, se, reason in replicaTuple: replicaDict[lfn] = { 'Prognosis': reason, 'LFN': lfn, 'PFN': pfn, 'SE': se } res = self.insertProblematic(sourceComponent, replicaDict) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" ) return res for lfn in replicaDict.keys(): replicaDict[lfn]['Status'] = 'Problematic' res = self.fc.setReplicaStatus(replicaDict) if not res['OK']: errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas." gLogger.error(errStr, res['Message']) return res failed = res['Value']['Failed'] successful = res['Value']['Successful'] resDict = {'Successful': successful, 'Failed': failed} return S_OK(resDict) ########################################################################## # # This section contains the resolution methods for various prognoses # def __updateCompletedFiles(self, prognosis, fileID): gLogger.info("%s file (%d) is resolved" % (prognosis, fileID)) return self.setProblematicStatus(fileID, 'Resolved') def __returnProblematicError(self, fileID, res): self.incrementProblematicRetry(fileID) gLogger.error('DataIntegrityClient failure', res['Message']) return res # def __getRegisteredPFNLFN( self, pfn, storageElement ): # # res = StorageElement( storageElement ).getURL( pfn ) # if not res['OK']: # gLogger.error( "Failed to get registered PFN for physical files", res['Message'] ) # return res # for pfn, error in res['Value']['Failed'].items(): # gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) # return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) # registeredPFN = res['Value']['Successful'][pfn] # res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) ) # if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ): # return S_OK( False ) # return S_OK( res['Value'] ) def __updateReplicaToChecked(self, problematicDict): lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] prognosis = problematicDict['Prognosis'] problematicDict['Status'] = 'Checked' res = returnSingleResult( self.fc.setReplicaStatus({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) gLogger.info("%s replica (%d) is updated to Checked status" % (prognosis, fileID)) return self.__updateCompletedFiles(prognosis, fileID) def resolveCatalogPFNSizeMismatch(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value'] res = returnSingleResult(StorageElement(se).getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) storageSize = res['Value'] bkKCatalog = FileCatalog(['BookkeepingDB']) res = returnSingleResult(bkKCatalog.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID) return self.__updateReplicaToChecked(problematicDict) if (catalogSize == bookkeepingSize): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(res['Value']) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID) res = self.dm.removeReplica(se, lfn) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('CatalogPFNSizeMismatch', fileID) if (catalogSize != bookkeepingSize) and (bookkeepingSize == storageSize): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID) res = self.__updateReplicaToChecked(problematicDict) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.changeProblematicPrognosis(fileID, 'BKCatalogSizeMismatch') gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID) def resolvePFNNotRegistered(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement(seName) res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if not res['Value']: # The file does not exist in the catalog res = returnSingleResult(se.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('PFNNotRegistered', fileID) res = returnSingleResult(se.getFileMetadata(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): gLogger.info("PFNNotRegistered replica (%d) found to be missing." % fileID) return self.__updateCompletedFiles('PFNNotRegistered', fileID) elif not res['OK']: return self.__returnProblematicError(fileID, res) storageMetadata = res['Value'] if storageMetadata['Lost']: gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNLost') if storageMetadata['Unavailable']: gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID) # HACK until we can obtain the space token descriptions through GFAL site = seName.split('_')[0].split('-')[0] if not storageMetadata['Cached']: if lfn.endswith('.raw'): seName = '%s-RAW' % site else: seName = '%s-RDST' % site elif storageMetadata['Migrated']: if lfn.startswith('/lhcb/data'): seName = '%s_M-DST' % site else: seName = '%s_MC_M-DST' % site else: if lfn.startswith('/lhcb/data'): seName = '%s-DST' % site else: seName = '%s_MC-DST' % site problematicDict['SE'] = seName res = returnSingleResult(se.getURL(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) problematicDict['PFN'] = res['Value'] res = returnSingleResult(self.fc.addReplica({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) res = returnSingleResult(self.fc.getFileMetadata(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']['Size'] != storageMetadata['Size']: gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'CatalogPFNSizeMismatch') return self.__updateCompletedFiles('PFNNotRegistered', fileID) def resolveLFNCatalogMissing(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']: return self.__updateCompletedFiles('LFNCatalogMissing', fileID) # Remove the file from all catalogs # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path res = returnSingleResult(self.fc.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('LFNCatalogMissing', fileID) def resolvePFNMissing(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis """ se = problematicDict['SE'] lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if not res['Value']: gLogger.info("PFNMissing file (%d) no longer exists in catalog" % fileID) return self.__updateCompletedFiles('PFNMissing', fileID) res = returnSingleResult(StorageElement(se).exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']: gLogger.info("PFNMissing replica (%d) is no longer missing" % fileID) return self.__updateReplicaToChecked(problematicDict) gLogger.info("PFNMissing replica (%d) does not exist" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True)) if not res['OK']: return self.__returnProblematicError(fileID, res) replicas = res['Value'] seSite = se.split('_')[0].split('-')[0] found = False print replicas for replicaSE in replicas.keys(): if re.search(seSite, replicaSE): found = True problematicDict['SE'] = replicaSE se = replicaSE if not found: gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID) return self.__updateCompletedFiles('PFNMissing', fileID) gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID) res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(replicas) == 1: gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas') res = self.dm.replicateAndRegister(problematicDict['LFN'], se) if not res['OK']: return self.__returnProblematicError(fileID, res) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles('PFNMissing', fileID) def resolvePFNUnavailable(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(StorageElement(se).getFileMetadata(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): # The file is no longer Unavailable but has now dissapeared completely gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNMissing') if (not res['OK']) or res['Value']['Unavailable']: gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID) return self.incrementProblematicRetry(fileID) if res['Value']['Lost']: gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNLost') gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" % fileID) # Need to make the replica okay in the Catalog return self.__updateReplicaToChecked(problematicDict) def resolvePFNZeroSize(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement(seName) res = returnSingleResult(se.getFileSize(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNMissing') storageSize = res['Value'] if storageSize == 0: res = returnSingleResult(se.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNMissing') res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if seName not in res['Value']: gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered') res = returnSingleResult(self.fc.getFileMetadata(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value']['Size'] if catalogSize != storageSize: gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'CatalogPFNSizeMismatch') return self.__updateCompletedFiles('PFNZeroSize', fileID) ############################################################################################ def resolveLFNZeroReplicas(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True)) if res['OK'] and res['Value']: gLogger.info("LFNZeroReplicas file (%d) found to have replicas" % fileID) else: gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID) pfnsFound = False for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [])): res = self.__getStoragePathExists([lfn], storageElementName) if lfn in res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % (fileID, storageElementName)) self.__reportProblematicReplicas( [(lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered')], storageElementName, 'PFNNotRegistered') pfnsFound = True if not pfnsFound: gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID) res = returnSingleResult(self.fc.removeFile(lfn)) if not res['OK']: gLogger.error('DataIntegrityClient: failed to remove file', res['Message']) # Increment the number of retries for this file self.server.incrementProblematicRetry(fileID) return res gLogger.info("LFNZeroReplicas file (%d) removed from catalog" % fileID) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
class CatalogPlugInTestCase(unittest.TestCase): """ Base class for the CatalogPlugin test case """ def setUp(self): self.fullMetadata = [ 'Status', 'ChecksumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size' ] self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths'] self.fileMetadata = self.fullMetadata + ['NumberOfLinks'] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assertTrue(valid) self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin' self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in xrange(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir, i) res = self.registerFile(lfn) self.assertTrue(res) self.files.append(lfn) def registerFile(self, lfn): pfn = 'protocol://host:port/storage/path%s' % lfn size = 10000000 se = 'DIRAC-storage' guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = { 'PFN': pfn, 'Size': size, 'SE': se, 'GUID': guid, 'Checksum': adler } res = self.catalog.addFile(fileDict) return self.parseResult(res, lfn) def parseResult(self, res, path): self.assertTrue(res['OK']) self.assertTrue(res['Value']) self.assertTrue(res['Value']['Successful']) self.assertTrue(path in res['Value']['Successful']) return res['Value']['Successful'][path] def parseError(self, res, path): self.assertTrue(res['OK']) self.assertTrue(res['Value']) self.assertTrue(res['Value']['Failed']) self.assertTrue(path in res['Value']['Failed']) return res['Value']['Failed'][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res, self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) toRemove = returnValue['Files'].keys() if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) self.assertTrue(returnValue) def purgeFiles(self, lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn, True) replicas = self.parseResult(res, lfn) for se, pfn in replicas.items(): repDict = {} repDict[lfn] = {'PFN': pfn, 'SE': se} res = self.catalog.removeReplica(repDict) self.parseResult(res, lfn) res = self.catalog.removeFile(lfn) self.parseResult(res, lfn) def tearDown(self): self.cleanDirectory()
def main(): days = 0 months = 0 years = 0 wildcard = None baseDir = '' emptyDirsFlag = False Script.registerSwitch("D:", "Days=", "Match files older than number of days [%s]" % days) Script.registerSwitch( "M:", "Months=", "Match files older than number of months [%s]" % months) Script.registerSwitch( "Y:", "Years=", "Match files older than number of years [%s]" % years) Script.registerSwitch("w:", "Wildcard=", "Wildcard for matching filenames [All]") Script.registerSwitch( "b:", "BaseDir=", "Base directory to begin search (default /[vo]/user/[initial]/[username])" ) Script.registerSwitch("e", "EmptyDirs", "Create a list of empty directories") Script.parseCommandLine(ignoreErrors=False) for switch in Script.getUnprocessedSwitches(): if switch[0] == "D" or switch[0].lower() == "days": days = int(switch[1]) if switch[0] == "M" or switch[0].lower() == "months": months = int(switch[1]) if switch[0] == "Y" or switch[0].lower() == "years": years = int(switch[1]) if switch[0].lower() == "w" or switch[0].lower() == "wildcard": wildcard = '*' + switch[1] if switch[0].lower() == "b" or switch[0].lower() == "basedir": baseDir = switch[1] if switch[0].lower() == "e" or switch[0].lower() == "emptydirs": emptyDirsFlag = True import DIRAC from DIRAC import gLogger from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getVOForGroup from DIRAC.Core.Security.ProxyInfo import getProxyInfo from DIRAC.Resources.Catalog.FileCatalog import FileCatalog from datetime import datetime, timedelta import sys import os import time import fnmatch fc = FileCatalog() def isOlderThan(cTimeStruct, days): timeDelta = timedelta(days=days) maxCTime = datetime.utcnow() - timeDelta if cTimeStruct < maxCTime: return True return False withMetadata = False if days or months or years: withMetadata = True totalDays = 0 if years: totalDays += 365 * years if months: totalDays += 30 * months if days: totalDays += days res = getProxyInfo(False, False) if not res['OK']: gLogger.error("Failed to get client proxy information.", res['Message']) DIRAC.exit(2) proxyInfo = res['Value'] if proxyInfo['secondsLeft'] == 0: gLogger.error("Proxy expired") DIRAC.exit(2) username = proxyInfo['username'] vo = '' if 'group' in proxyInfo: vo = getVOForGroup(proxyInfo['group']) if not baseDir: if not vo: gLogger.error('Could not determine VO') Script.showHelp() baseDir = '/%s/user/%s/%s' % (vo, username[0], username) baseDir = baseDir.rstrip('/') gLogger.notice('Will search for files in %s%s' % (baseDir, (' matching %s' % wildcard) if wildcard else '')) activeDirs = [baseDir] allFiles = [] emptyDirs = [] while len(activeDirs) > 0: currentDir = activeDirs.pop() res = fc.listDirectory(currentDir, withMetadata, timeout=360) if not res['OK']: gLogger.error("Error retrieving directory contents", "%s %s" % (currentDir, res['Message'])) elif currentDir in res['Value']['Failed']: gLogger.error( "Error retrieving directory contents", "%s %s" % (currentDir, res['Value']['Failed'][currentDir])) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] files = dirContents['Files'] if not subdirs and not files: emptyDirs.append(currentDir) gLogger.notice('%s: empty directory' % currentDir) else: for subdir in sorted(subdirs, reverse=True): if (not withMetadata) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays): activeDirs.append(subdir) for filename in sorted(files): fileOK = False if (not withMetadata) or isOlderThan( files[filename]['MetaData']['CreationDate'], totalDays): if wildcard is None or fnmatch.fnmatch( filename, wildcard): fileOK = True if not fileOK: files.pop(filename) allFiles += sorted(files) if len(files) or len(subdirs): gLogger.notice( "%s: %d files%s, %d sub-directories" % (currentDir, len(files), ' matching' if withMetadata or wildcard else '', len(subdirs))) outputFileName = '%s.lfns' % baseDir.replace('/%s' % vo, '%s' % vo).replace('/', '-') outputFile = open(outputFileName, 'w') for lfn in sorted(allFiles): outputFile.write(lfn + '\n') outputFile.close() gLogger.notice('%d matched files have been put in %s' % (len(allFiles), outputFileName)) if emptyDirsFlag: outputFileName = '%s.emptydirs' % baseDir.replace( '/%s' % vo, '%s' % vo).replace('/', '-') outputFile = open(outputFileName, 'w') for dir in sorted(emptyDirs): outputFile.write(dir + '\n') outputFile.close() gLogger.notice('%d empty directories have been put in %s' % (len(emptyDirs), outputFileName)) DIRAC.exit(0)
if not baseDir: if not vo: gLogger.error( 'Could not determine VO' ) Script.showHelp() baseDir = '/%s/user/%s/%s' % ( vo, username[0], username ) baseDir = baseDir.rstrip( '/' ) gLogger.info( 'Will search for files in %s' % baseDir ) activeDirs = [baseDir] allFiles = [] emptyDirs = [] while len( activeDirs ) > 0: currentDir = activeDirs.pop() res = fc.listDirectory( currentDir, withMetadata, timeout = 360 ) if not res['OK']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) ) elif currentDir in res['Value']['Failed']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) ) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] files = dirContents['Files'] if not subdirs and not files: emptyDirs.append( currentDir ) gLogger.notice( '%s: empty directory' % currentDir ) else: for subdir in sorted( subdirs, reverse = True ): if ( not withMetadata ) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays ): activeDirs.append( subdir )
elif mct in ['pn', 'ps']: mcname = 'proton' else: gLogger.error('Uknown config extension: ', mct) DIRAC.exit(2) gLogger.notice('Working with prodName ', prodName) from DIRAC.Resources.Catalog.FileCatalog import FileCatalog cat = FileCatalog() BASE_PROD_DIR = '/vo.cta.in2p3.fr/MC/PROD2/' topMCDir = os.path.join(BASE_PROD_DIR, prodName, 'prod-2_13112014_corsika', mcname) res = cat.listDirectory(topMCDir) NB_FILES_DIR = {} TOTAL_SIZE_DIR = {} gLogger.notice('Looking for Data files...') subdirs = res['Value']['Successful'].values()[0]['SubDirs'].keys() for adir in subdirs: tag = adir.split('/')[-1].split('_')[-1] print tag, NB_FILES_DIR[tag] = 0 TOTAL_SIZE_DIR[tag] = 0 subres = cat.listDirectory(os.path.join(adir, 'Data')) for xxx in subres['Value']['Successful'].values()[0]['SubDirs'].keys(): sizeDir = cat.getDirectorySize(xxx)['Value']['Successful'] for key, val in sizeDir.items(): print val['Files'], NB_FILES_DIR[tag] += val['Files']
class DIRACBackend(GridBackend): """Grid backend using the GFAL command line tools `gfal-*`.""" def __init__(self, **kwargs): GridBackend.__init__(self, catalogue_prefix='', **kwargs) from DIRAC.Core.Base import Script Script.initialize() from DIRAC.FrameworkSystem.Client.ProxyManagerClient import ProxyManagerClient self.pm = ProxyManagerClient() proxy = self.pm.getUserProxiesInfo() if not proxy['OK']: raise BackendException("Proxy error.") from DIRAC.Interfaces.API.Dirac import Dirac self.dirac = Dirac() from DIRAC.Resources.Catalog.FileCatalog import FileCatalog self.fc = FileCatalog() from DIRAC.DataManagementSystem.Client.DataManager import DataManager self.dm = DataManager() self._xattr_cmd = sh.Command('gfal-xattr').bake(_tty_out=False) self._replica_checksum_cmd = sh.Command('gfal-sum').bake(_tty_out=False) self._bringonline_cmd = sh.Command('gfal-legacy-bringonline').bake(_tty_out=False) self._cp_cmd = sh.Command('gfal-copy').bake(_tty_out=False) self._ls_se_cmd = sh.Command('gfal-ls').bake(color='never', _tty_out=False) self._move_cmd = sh.Command('gfal-rename').bake(_tty_out=False) self._mkdir_cmd = sh.Command('gfal-mkdir').bake(_tty_out=False) self._replicate_cmd = sh.Command('dirac-dms-replicate-lfn').bake(_tty_out=False) self._add_cmd = sh.Command('dirac-dms-add-file').bake(_tty_out=False) @staticmethod def _check_return_value(ret): if not ret['OK']: raise BackendException("Failed: %s", ret['Message']) for path, error in ret['Value']['Failed'].items(): if ('No such' in error) or ('Directory does not' in error): raise DoesNotExistException("No such file or directory.") else: raise BackendException(error) def _is_dir(self, lurl): isdir = self.fc.isDirectory(lurl) self._check_return_value(isdir) return isdir['Value']['Successful'][lurl] def _is_file(self, lurl): isfile = self.fc.isFile(lurl) self._check_return_value(isfile) return isfile['Value']['Successful'][lurl] def _get_dir_entry(self, lurl, infodict=None): """Take a lurl and return a DirEntry.""" # If no dctionary with the information is specified, get it from the catalogue try: md = infodict['MetaData'] except TypeError: md = self.fc.getFileMetadata(lurl) if not md['OK']: raise BackendException("Failed to list path '%s': %s", lurl, md['Message']) for path, error in md['Value']['Failed'].items(): if 'No such file' in error: # File does not exist, maybe a directory? md = self.fc.getDirectoryMetadata(lurl) for path, error in md['Value']['Failed'].items(): raise DoesNotExistException("No such file or directory.") else: raise BackendException(md['Value']['Failed'][lurl]) md = md['Value']['Successful'][lurl] return DirEntry(posixpath.basename(lurl), mode=oct(md.get('Mode', -1)), links=md.get('links', -1), gid=md['OwnerGroup'], uid=md['Owner'], size=md.get('Size', -1), modified=str(md.get('ModificationDate', '?'))) def _iter_directory(self, lurl): """Iterate over entries in a directory.""" ret = self.fc.listDirectory(lurl) if not ret['OK']: raise BackendException("Failed to list path '%s': %s", lurl, ret['Message']) for path, error in ret['Value']['Failed'].items(): if 'Directory does not' in error: # Dir does not exist, maybe a File? if self.fc.isFile(lurl): lst = [(lurl, None)] break else: raise DoesNotExistException("No such file or Directory.") else: raise BackendException(ret['Value']['Failed'][lurl]) else: # Sort items by keys, i.e. paths lst = sorted(ret['Value']['Successful'][lurl]['Files'].items() + ret['Value']['Successful'][lurl]['SubDirs'].items()) for item in lst: yield item # = path, dict def _ls(self, lurl, **kwargs): # Translate keyword arguments d = kwargs.pop('directory', False) if d: # Just the requested entry itself yield self._get_dir_entry(lurl) return for path, info in self._iter_directory(lurl): yield self._get_dir_entry(path, info) def _ls_se(self, surl, **kwargs): # Translate keyword arguments d = kwargs.pop('directory', False) args = [] if -d: args.append('-d') args.append('-l') args.append(surl) try: output = self._ls_se_cmd(*args, **kwargs) except sh.ErrorReturnCode as e: if 'No such file' in e.stderr: raise DoesNotExistException("No such file or Directory.") else: raise BackendException(e.stderr) for line in output: fields = line.split() mode, links, gid, uid, size = fields[:5] name = fields[-1] modified = ' '.join(fields[5:-1]) yield DirEntry(name, mode=mode, links=int(links), gid=gid, uid=uid, size=int(size), modified=modified) def _replicas(self, lurl, **kwargs): # Check the lurl actually exists self._ls(lurl, directory=True) rep = self.dirac.getReplicas(lurl) self._check_return_value(rep) rep = rep['Value']['Successful'][lurl] return rep.values() def _exists(self, surl, **kwargs): try: ret = self._ls_se_cmd(surl, '-d', '-l', **kwargs).strip() except sh.ErrorReturnCode as e: if 'No such file' in e.stderr: return False else: if len(e.stderr) == 0: raise BackendException(e.stdout) else: raise BackendException(e.stderr) else: return ret[0] != 'd' # Return `False` for directories def _register(self, surl, lurl, verbose=False, **kwargs): # Register an existing physical copy in the file catalogue se = storage.get_SE(surl).name # See if file already exists in DFC ret = self.fc.getFileMetadata(lurl) try: self._check_return_value(ret) except DoesNotExistException: # Add new file size = next(self._ls_se(surl, directory=True)).size checksum = self.checksum(surl) guid = str(uuid.uuid4()) # The guid does not seem to be important. Make it unique if possible. ret = self.dm.registerFile((lurl, surl, size, se, guid, checksum)) else: # Add new replica ret = self.dm.registerReplica((lurl, surl, se)) self._check_return_value(ret) if verbose: print_("Successfully registered replica %s of %s from %s."%(surl, lurl, se)) return True def _deregister(self, surl, lurl, verbose=False, **kwargs): # DIRAC only needs to know the SE name to deregister a replica se = storage.get_SE(surl).name ret = self.dm.removeReplicaFromCatalog(se, [lurl]) self._check_return_value(ret) if verbose: print_("Successfully deregistered replica of %s from %s."%(lurl, se)) return True def _state(self, surl, **kwargs): try: state = self._xattr_cmd(surl, 'user.status', **kwargs).strip() except sh.ErrorReturnCode as e: if "No such file" in e.stderr: raise DoesNotExistException("No such file or Directory.") state = '?' except sh.SignalException_SIGSEGV: state = '?' return state def _checksum(self, surl, **kwargs): try: checksum = self._replica_checksum_cmd(surl, 'ADLER32', **kwargs).split()[1] except sh.ErrorReturnCode: checksum = '?' except sh.SignalException_SIGSEGV: checksum = '?' except IndexError: checksum = '?' return checksum def _bringonline(self, surl, timeout, verbose=False, **kwargs): if verbose: out = sys.stdout else: out = None # gfal does not notice when files come online, it seems # Just send a single short request, then check regularly if verbose: out = sys.stdout else: out = None end = time.time() + timeout try: self._bringonline_cmd('-t', 10, surl, _out=out, **kwargs) except sh.ErrorReturnCode as e: # The command fails if the file is not online # To be expected after 10 seconds if "No such file" in e.stderr: # Except when the file does not actually exist on the tape storage raise DoesNotExistException("No such file or Directory.") wait = 5 while(True): if verbose: print_("Checking replica state...") if self.is_online(surl): if verbose: print_("Replica brought online.") return True time_left = end - time.time() if time_left <= 0: if verbose: print_("Could not bring replica online.") return False wait *= 2 if time_left < wait: wait = time_left if verbose: print_("Timeout remaining: %d s"%(time_left)) print_("Checking again in: %d s"%(wait)) time.sleep(wait) def _replicate(self, source_surl, destination_surl, lurl, verbose=False, **kwargs): if verbose: out = sys.stdout else: out = None source = storage.get_SE(source_surl).name destination = storage.get_SE(destination_surl).name try: self._replicate_cmd(lurl, destination, source, _out=out, **kwargs) except sh.ErrorReturnCode as e: if 'No such file' in e.stderr: raise DoesNotExistException("No such file or directory.") else: if len(e.stderr) == 0: raise BackendException(e.stdout) else: raise BackendException(e.stderr) return True def _get(self, surl, localpath, verbose=False, **kwargs): if verbose: out = sys.stdout else: out = None try: self._cp_cmd('-f', '--checksum', 'ADLER32', surl, localpath, _out=out, **kwargs) except sh.ErrorReturnCode as e: if 'No such file' in e.stderr: raise DoesNotExistException("No such file or directory.") else: if len(e.stderr) == 0: raise BackendException(e.stdout) else: raise BackendException(e.stderr) return os.path.isfile(localpath) def _put(self, localpath, surl, lurl, verbose=False, **kwargs): if verbose: out = sys.stdout else: out = None se = storage.get_SE(surl).name try: self._add_cmd(lurl, localpath, se, _out=out, **kwargs) except sh.ErrorReturnCode as e: if 'No such file' in e.stderr: raise DoesNotExistException("No such file or directory.") else: if len(e.stderr) == 0: raise BackendException(e.stdout) else: raise BackendException(e.stderr) return True def _remove(self, surl, lurl, last=False, verbose=False, **kwargs): se = storage.get_SE(surl).name if last: # Delete lfn if verbose: print_("Removing all replicas of %s."%(lurl,)) ret = self.dm.removeFile([lurl]) else: if verbose: print_("Removing replica of %s from %s."%(lurl, se)) ret = self.dm.removeReplica(se, [lurl]) if not ret['OK']: raise BackendException('Failed: %s'%(ret['Message'])) for lurl, error in ret['Value']['Failed'].items(): if 'No such file' in error: raise DoesNotExistException("No such file or directory.") else: raise BackendException(error) return True def _rmdir(self, lurl, verbose=False): """Remove the an empty directory from the catalogue.""" rep = self.fc.removeDirectory(lurl) self._check_return_value(rep) return True def _move_replica(self, surl, new_surl, verbose=False, **kwargs): if verbose: out = sys.stdout else: out = None try: folder = posixpath.dirname(new_surl) self._mkdir_cmd(folder, '-p', _out=out, **kwargs) self._move_cmd(surl, new_surl, _out=out, **kwargs) except sh.ErrorReturnCode as e: if 'No such file' in e.stderr: raise DoesNotExistException("No such file or directory.") else: if len(e.stderr) == 0: raise BackendException(e.stdout) else: raise BackendException(e.stderr) return True
class StorageUsageAgent(AgentModule): ''' .. class:: StorageUsageAgent :param FileCatalog catalog: FileCatalog instance :parma mixed storageUsage: StorageUsageDB instance or its rpc client :param int pollingTime: polling time :param int activePeriod: active period on weeks :param threading.Lock dataLock: data lock :param threading.Lock replicaListLock: replica list lock :param DictCache proxyCache: creds cache ''' catalog = None storageUsage = None pollingTime = 43200 activePeriod = 0 dataLock = None # threading.Lock() replicaListLock = None # threading.Lock() proxyCache = None # DictCache() enableStartupSleep = True # Enable a random sleep so not all the user agents start together def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) self.__baseDir = '/lhcb' self.__baseDirLabel = "_".join(List.fromChar(self.__baseDir, "/")) self.__ignoreDirsList = [] self.__keepDirLevels = 4 self.__startExecutionTime = long(time.time()) self.__dirExplorer = DirectoryExplorer(reverse=True) self.__processedDirs = 0 self.__directoryOwners = {} self.catalog = FileCatalog() self.__maxToPublish = self.am_getOption('MaxDirectories', 5000) if self.am_getOption('DirectDB', False): self.storageUsage = StorageUsageDB() else: # Set a timeout of 0.1 seconds per directory (factor 5 margin) self.storageUsage = RPCClient('DataManagement/StorageUsage', timeout=self.am_getOption( 'Timeout', int(self.__maxToPublish * 0.1))) self.activePeriod = self.am_getOption('ActivePeriod', self.activePeriod) self.dataLock = threading.Lock() self.replicaListLock = threading.Lock() self.proxyCache = DictCache(removeProxy) self.__noProxy = set() self.__catalogType = None self.__recalculateUsage = Operations().getValue( 'DataManagement/RecalculateDirSize', False) self.enableStartupSleep = self.am_getOption('EnableStartupSleep', self.enableStartupSleep) self.__publishDirQueue = {} self.__dirsToPublish = {} self.__replicaFilesUsed = set() self.__replicaListFilesDir = "" def initialize(self): ''' agent initialisation ''' self.am_setOption("PollingTime", self.pollingTime) if self.enableStartupSleep: rndSleep = random.randint(1, self.pollingTime) self.log.info("Sleeping for %s seconds" % rndSleep) time.sleep(rndSleep) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configsorteduration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK() def __writeReplicasListFiles(self, dirPathList): ''' dump replicas list to files ''' self.replicaListLock.acquire() try: self.log.info("Dumping replicas for %s dirs" % len(dirPathList)) result = self.catalog.getDirectoryReplicas(dirPathList) if not result['OK']: self.log.error("Could not get directory replicas", "%s -> %s" % (dirPathList, result['Message'])) return result resData = result['Value'] filesOpened = {} for dirPath in dirPathList: if dirPath in result['Value']['Failed']: self.log.error( "Could not get directory replicas", "%s -> %s" % (dirPath, resData['Failed'][dirPath])) continue dirData = resData['Successful'][dirPath] for lfn in dirData: for seName in dirData[lfn]: if seName not in filesOpened: filePath = os.path.join( self.__replicaListFilesDir, "replicas.%s.%s.filling" % (seName, self.__baseDirLabel)) # Check if file is opened and if not open it if seName not in filesOpened: if seName not in self.__replicaFilesUsed: self.__replicaFilesUsed.add(seName) filesOpened[seName] = file(filePath, "w") else: filesOpened[seName] = file(filePath, "a") # seName file is opened. Write filesOpened[seName].write("%s -> %s\n" % (lfn, dirData[lfn][seName])) # Close the files for seName in filesOpened: filesOpened[seName].close() return S_OK() finally: self.replicaListLock.release() def __resetReplicaListFiles(self): ''' prepare directories for replica list files ''' self.__replicaFilesUsed = set() self.__replicaListFilesDir = os.path.join( self.am_getOption("WorkDirectory"), "replicaLists") mkDir(self.__replicaListFilesDir) self.log.info("Replica Lists directory is %s" % self.__replicaListFilesDir) def __replicaListFilesDone(self): ''' rotate replicas list files ''' self.replicaListLock.acquire() try: old = re.compile(r"^replicas\.([a-zA-Z0-9\-_]*)\.%s\.old$" % self.__baseDirLabel) current = re.compile(r"^replicas\.([a-zA-Z0-9\-_]*)\.%s$" % self.__baseDirLabel) filling = re.compile( r"^replicas\.([a-zA-Z0-9\-_]*)\.%s\.filling$" % self.__baseDirLabel) # Delete old for fileName in os.listdir(self.__replicaListFilesDir): match = old.match(fileName) if match: os.unlink( os.path.join(self.__replicaListFilesDir, fileName)) # Current -> old for fileName in os.listdir(self.__replicaListFilesDir): match = current.match(fileName) if match: newFileName = "replicas.%s.%s.old" % (match.group(1), self.__baseDirLabel) self.log.info( "Moving \n %s\n to \n %s" % (os.path.join(self.__replicaListFilesDir, fileName), os.path.join(self.__replicaListFilesDir, newFileName))) os.rename( os.path.join(self.__replicaListFilesDir, fileName), os.path.join(self.__replicaListFilesDir, newFileName)) # filling to current for fileName in os.listdir(self.__replicaListFilesDir): match = filling.match(fileName) if match: newFileName = "replicas.%s.%s" % (match.group(1), self.__baseDirLabel) self.log.info( "Moving \n %s\n to \n %s" % (os.path.join(self.__replicaListFilesDir, fileName), os.path.join(self.__replicaListFilesDir, newFileName))) os.rename( os.path.join(self.__replicaListFilesDir, fileName), os.path.join(self.__replicaListFilesDir, newFileName)) return S_OK() finally: self.replicaListLock.release() def __printSummary(self): ''' pretty print summary ''' res = self.storageUsage.getStorageSummary() if res['OK']: self.log.notice("Storage Usage Summary") self.log.notice( "============================================================") self.log.notice( "%-40s %20s %20s" % ('Storage Element', 'Number of files', 'Total size')) for se in sorted(res['Value']): site = se.split('_')[0].split('-')[0] gMonitor.registerActivity("%s-used" % se, "%s usage" % se, "StorageUsage/%s usage" % site, "", gMonitor.OP_MEAN, bucketLength=600) gMonitor.registerActivity("%s-files" % se, "%s files" % se, "StorageUsage/%s files" % site, "Files", gMonitor.OP_MEAN, bucketLength=600) time.sleep(2) for se in sorted(res['Value']): usage = res['Value'][se]['Size'] files = res['Value'][se]['Files'] self.log.notice("%-40s %20s %20s" % (se, str(files), str(usage))) gMonitor.addMark("%s-used" % se, usage) gMonitor.addMark("%s-files" % se, files) def execute(self): ''' execution in one cycle ''' self.__publishDirQueue = {} self.__dirsToPublish = {} self.__baseDir = self.am_getOption('BaseDirectory', '/lhcb') self.__baseDirLabel = "_".join(List.fromChar(self.__baseDir, "/")) self.__ignoreDirsList = self.am_getOption('Ignore', []) self.__keepDirLevels = self.am_getOption("KeepDirLevels", 4) self.__startExecutionTime = long(time.time()) self.__dirExplorer = DirectoryExplorer(reverse=True) self.__resetReplicaListFiles() self.__noProxy = set() self.__processedDirs = 0 self.__directoryOwners = {} self.__printSummary() self.__dirExplorer.addDir(self.__baseDir) self.log.notice("Initiating with %s as base directory." % self.__baseDir) # Loop over all the directories and sub-directories totalIterTime = 0.0 numIterations = 0.0 iterMaxDirs = 100 while self.__dirExplorer.isActive(): startT = time.time() d2E = [ self.__dirExplorer.getNextDir() for _i in xrange(iterMaxDirs) if self.__dirExplorer.isActive() ] self.__exploreDirList(d2E) iterTime = time.time() - startT totalIterTime += iterTime numIterations += len(d2E) self.log.verbose("Query took %.2f seconds for %s dirs" % (iterTime, len(d2E))) self.log.verbose("Average query time: %2.f secs/dir" % (totalIterTime / numIterations)) # Publish remaining directories self.__publishData(background=False) # Move replica list files self.__replicaListFilesDone() # Clean records older than 1 day self.log.info("Finished recursive directory search.") if self.am_getOption("PurgeOutdatedRecords", True): elapsedTime = time.time() - self.__startExecutionTime outdatedSeconds = max( max(self.am_getOption("PollingTime"), elapsedTime) * 2, 86400) result = self.storageUsage.purgeOutdatedEntries( self.__baseDir, long(outdatedSeconds), self.__ignoreDirsList) if not result['OK']: return result self.log.notice("Purged %s outdated records" % result['Value']) return S_OK() def __exploreDirList(self, dirList): ''' collect directory size for directory in :dirList: ''' # Normalise dirList first dirList = [os.path.realpath(d) for d in dirList] self.log.notice("Retrieving info for %s dirs" % len(dirList)) # For top directories, no files anyway, hence no need to get full size dirContents = {} failed = {} successfull = {} startTime = time.time() nbDirs = len(dirList) chunkSize = 10 if self.__catalogType == 'DFC' or dirList == [self.__baseDir]: # Get the content of the directory as anyway this is needed for dirChunk in breakListIntoChunks(dirList, chunkSize): res = self.catalog.listDirectory(dirChunk, True, timeout=600) if not res['OK']: failed.update(dict.fromkeys(dirChunk, res['Message'])) else: failed.update(res['Value']['Failed']) dirContents.update(res['Value']['Successful']) self.log.info( 'Time to retrieve content of %d directories: %.1f seconds' % (nbDirs, time.time() - startTime)) for dirPath in failed: dirList.remove(dirPath) # We don't need to get the storage usage if there are no files... dirListSize = [ d for d in dirList if dirContents.get(d, {}).get('Files') ] startTime1 = time.time() # __recalculateUsage enables to recompute the directory usage in case the internal table is wrong for args in [(d, True, self.__recalculateUsage) for d in breakListIntoChunks(dirListSize, chunkSize)]: res = self.catalog.getDirectorySize(*args, timeout=600) if not res['OK']: failed.update(dict.fromkeys(args[0], res['Message'])) else: failed.update(res['Value']['Failed']) successfull.update(res['Value']['Successful']) errorReason = {} for dirPath in failed: error = str(failed[dirPath]) errorReason.setdefault(error, []).append(dirPath) for error in errorReason: self.log.error( 'Failed to get directory info', '- %s for:\n\t%s' % (error, '\n\t'.join(errorReason[error]))) self.log.info('Time to retrieve size of %d directories: %.1f seconds' % (len(dirListSize), time.time() - startTime1)) for dirPath in [d for d in dirList if d not in failed]: metadata = successfull.get(dirPath, {}) if 'SubDirs' in metadata: self.__processDir(dirPath, metadata) else: if not self.__catalogType: self.log.info('Catalog type determined to be DFC') self.__catalogType = 'DFC' self.__processDirDFC(dirPath, metadata, dirContents[dirPath]) self.log.info('Time to process %d directories: %.1f seconds' % (nbDirs, time.time() - startTime)) notCommited = len(self.__publishDirQueue) + len(self.__dirsToPublish) self.log.notice( "%d dirs to be explored, %d done. %d not yet committed." % (self.__dirExplorer.getNumRemainingDirs(), self.__processedDirs, notCommited)) def __processDirDFC(self, dirPath, metadata, subDirectories): ''' gets the list of subdirs that the DFC doesn't return, set the metadata like the FC and then call the same method as for the FC ''' if 'SubDirs' not in subDirectories: self.log.error('No subdirectory item for directory', dirPath) return dirMetadata = { 'Files': 0, 'TotalSize': 0, 'ClosedDirs': [], 'SiteUsage': {} } if 'PhysicalSize' in metadata: dirMetadata['Files'] = metadata['LogicalFiles'] dirMetadata['TotalSize'] = metadata['LogicalSize'] dirMetadata['SiteUsage'] = metadata['PhysicalSize'].copy() dirMetadata['SiteUsage'].pop('TotalFiles', None) dirMetadata['SiteUsage'].pop('TotalSize', None) subDirs = subDirectories['SubDirs'].copy() dirMetadata['SubDirs'] = subDirs dirUsage = dirMetadata['SiteUsage'] errorReason = {} for subDir in subDirs: self.__directoryOwners.setdefault( subDir, (subDirs[subDir]['Owner'], subDirs[subDir]['OwnerGroup'])) subDirs[subDir] = subDirs[subDir].get('CreationTime', dateTime()) if dirUsage: # This part here is for removing the recursivity introduced by the DFC args = [subDir] if len(subDir.split('/')) > self.__keepDirLevels: args += [True, self.__recalculateUsage] result = self.catalog.getDirectorySize(*args) if not result['OK']: errorReason.setdefault(str(result['Message']), []).append(subDir) else: metadata = result['Value']['Successful'].get(subDir) if metadata: dirMetadata['Files'] -= metadata['LogicalFiles'] dirMetadata['TotalSize'] -= metadata['LogicalSize'] else: errorReason.setdefault( str(result['Value']['Failed'][subDir], [])).append(subDir) if 'PhysicalSize' in metadata and dirUsage: seUsage = metadata['PhysicalSize'] seUsage.pop('TotalFiles', None) seUsage.pop('TotalSize', None) for se in seUsage: if se not in dirUsage: self.log.error('SE used in subdir but not in dir', se) else: dirUsage[se]['Files'] -= seUsage[se]['Files'] dirUsage[se]['Size'] -= seUsage[se]['Size'] for error in errorReason: self.log.error( 'Failed to get directory info', '- %s for:\n\t%s' % (error, '\n\t'.join(errorReason[error]))) for se, usage in dirUsage.items(): # Both info should be 0 or #0 if not usage['Files'] and not usage['Size']: dirUsage.pop(se) elif not usage['Files'] * usage['Size']: self.log.error('Directory inconsistent', '%s @ %s: %s' % (dirPath, se, str(usage))) return self.__processDir(dirPath, dirMetadata) def __processDir(self, dirPath, dirMetadata): ''' calculate nb of files and size of :dirPath:, remove it if it's empty ''' subDirs = dirMetadata['SubDirs'] closedDirs = dirMetadata['ClosedDirs'] ############################## # FIXME: Until we understand while closed dirs are not working... ############################## closedDirs = [] prStr = "%s: found %s sub-directories" % (dirPath, len(subDirs) if subDirs else 'no') if closedDirs: prStr += ", %s are closed (ignored)" % len(closedDirs) for rmDir in closedDirs + self.__ignoreDirsList: subDirs.pop(rmDir, None) numberOfFiles = long(dirMetadata['Files']) totalSize = long(dirMetadata['TotalSize']) if numberOfFiles: prStr += " and %s files (%s bytes)" % (numberOfFiles, totalSize) else: prStr += " and no files" self.log.notice(prStr) if closedDirs: self.log.verbose("Closed dirs:\n %s" % '\n'.join(closedDirs)) siteUsage = dirMetadata['SiteUsage'] if numberOfFiles > 0: dirData = { 'Files': numberOfFiles, 'TotalSize': totalSize, 'SEUsage': siteUsage } self.__addDirToPublishQueue(dirPath, dirData) # Print statistics self.log.verbose( "%-40s %20s %20s" % ('Storage Element', 'Number of files', 'Total size')) for storageElement in sorted(siteUsage): usageDict = siteUsage[storageElement] self.log.verbose( "%-40s %20s %20s" % (storageElement, str( usageDict['Files']), str(usageDict['Size']))) # If it's empty delete it elif len(subDirs) == 0 and len(closedDirs) == 0: if dirPath != self.__baseDir: self.removeEmptyDir(dirPath) return # We don't need the cached information about owner self.__directoryOwners.pop(dirPath, None) rightNow = dateTime() chosenDirs = [ subDir for subDir in subDirs if not self.activePeriod or timeInterval( subDirs[subDir], self.activePeriod * week).includes(rightNow) ] self.__dirExplorer.addDirList(chosenDirs) self.__processedDirs += 1 def __getOwnerProxy(self, dirPath): ''' get owner creds for :dirPath: ''' self.log.verbose("Retrieving dir metadata...") # get owner form the cached information, if not, try getDirectoryMetadata ownerName, ownerGroup = self.__directoryOwners.pop( dirPath, (None, None)) if not ownerName or not ownerGroup: result = returnSingleResult( self.catalog.getDirectoryMetadata(dirPath)) if not result['OK'] or 'OwnerRole' not in result['Value']: self.log.error("Could not get metadata info", result['Message']) return result ownerRole = result['Value']['OwnerRole'] ownerDN = result['Value']['OwnerDN'] if ownerRole[0] != "/": ownerRole = "/%s" % ownerRole cacheKey = (ownerDN, ownerRole) ownerName = 'unknown' byGroup = False else: ownerDN = Registry.getDNForUsername(ownerName) if not ownerDN['OK']: self.log.error("Could not get DN from user name", ownerDN['Message']) return ownerDN ownerDN = ownerDN['Value'][0] # This bloody method returns directly a string!!!! ownerRole = Registry.getVOMSAttributeForGroup(ownerGroup) byGroup = True # Get all groups for that VOMS Role, and add lhcb_user as in DFC this is a safe value ownerGroups = Registry.getGroupsWithVOMSAttribute(ownerRole) + [ 'lhcb_user' ] downErrors = [] for ownerGroup in ownerGroups: if byGroup: ownerRole = None cacheKey = (ownerDN, ownerGroup) if cacheKey in self.__noProxy: return S_ERROR("Proxy not available") # Getting the proxy... upFile = self.proxyCache.get(cacheKey, 3600) if upFile and os.path.exists(upFile): self.log.verbose( 'Returning cached proxy for %s %s@%s [%s] in %s' % (ownerName, ownerDN, ownerGroup, ownerRole, upFile)) return S_OK(upFile) if ownerRole: result = gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited=False, requiredVOMSAttribute=ownerRole) else: result = gProxyManager.downloadProxy(ownerDN, ownerGroup, limited=False) if not result['OK']: downErrors.append("%s : %s" % (cacheKey, result['Message'])) continue userProxy = result['Value'] secsLeft = max(0, userProxy.getRemainingSecs()['Value']) upFile = userProxy.dumpAllToFile() if upFile['OK']: upFile = upFile['Value'] else: return upFile self.proxyCache.add(cacheKey, secsLeft, upFile) self.log.info("Got proxy for %s %s@%s [%s]" % (ownerName, ownerDN, ownerGroup, ownerRole)) return S_OK(upFile) self.__noProxy.add(cacheKey) return S_ERROR("Could not download proxy for user (%s, %s):\n%s " % (ownerDN, ownerRole, "\n ".join(downErrors))) def removeEmptyDir(self, dirPath): self.log.notice("Deleting empty directory %s" % dirPath) for useOwnerProxy in (False, True): result = self.__removeEmptyDir(dirPath, useOwnerProxy=useOwnerProxy) if result['OK']: self.log.info( "Successfully removed empty directory from File Catalog and StorageUsageDB" ) break return result def __removeEmptyDir(self, dirPath, useOwnerProxy=True): ''' unlink empty folder :dirPath: ''' from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData if len(List.fromChar(dirPath, "/")) < self.__keepDirLevels: return S_OK() if useOwnerProxy: result = self.__getOwnerProxy(dirPath) if not result['OK']: if 'Proxy not available' not in result['Message']: self.log.error(result['Message']) return result upFile = result['Value'] prevProxyEnv = os.environ['X509_USER_PROXY'] os.environ['X509_USER_PROXY'] = upFile try: gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false') # res = self.catalog.removeDirectory( dirPath ) res = self.catalog.writeCatalogs[0][1].removeDirectory(dirPath) if not res['OK']: self.log.error( "Error removing empty directory from File Catalog.", res['Message']) return res elif dirPath in res['Value']['Failed']: self.log.error( "Failed to remove empty directory from File Catalog.", res['Value']['Failed'][dirPath]) self.log.debug(str(res)) return S_ERROR(res['Value']['Failed'][dirPath]) res = self.storageUsage.removeDirectory(dirPath) if not res['OK']: self.log.error( "Failed to remove empty directory from Storage Usage database.", res['Message']) return res return S_OK() finally: gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true') if useOwnerProxy: os.environ['X509_USER_PROXY'] = prevProxyEnv def __addDirToPublishQueue(self, dirName, dirData): ''' enqueue :dirName: and :dirData: for publishing ''' self.__publishDirQueue[dirName] = dirData numDirsToPublish = len(self.__publishDirQueue) if numDirsToPublish and numDirsToPublish % self.am_getOption( "PublishClusterSize", 100) == 0: self.__publishData(background=True) def __publishData(self, background=True): ''' publish data in a separate deamon thread ''' self.dataLock.acquire() try: # Dump to file if self.am_getOption("DumpReplicasToFile", False): pass # repThread = threading.Thread( target = self.__writeReplicasListFiles, # args = ( list( self.__publishDirQueue ), ) ) self.__dirsToPublish.update(self.__publishDirQueue) self.__publishDirQueue = {} finally: self.dataLock.release() if background: pubThread = threading.Thread(target=self.__executePublishData) pubThread.setDaemon(1) pubThread.start() else: self.__executePublishData() def __executePublishData(self): ''' publication thread target ''' self.dataLock.acquire() try: if not self.__dirsToPublish: self.log.info("No data to be published") return if len(self.__dirsToPublish) > self.__maxToPublish: toPublish = {} for dirName in sorted( self.__dirsToPublish)[:self.__maxToPublish]: toPublish[dirName] = self.__dirsToPublish.pop(dirName) else: toPublish = self.__dirsToPublish self.log.info("Publishing usage for %d directories" % len(toPublish)) res = self.storageUsage.publishDirectories(toPublish) if res['OK']: # All is OK, reset the dictionary, even if data member! toPublish.clear() else: # Put back dirs to be published, due to the error self.__dirsToPublish.update(toPublish) self.log.error("Failed to publish directories", res['Message']) return res finally: self.dataLock.release()
if not baseDir: if not vo: gLogger.error('Could not determine VO') Script.showHelp() baseDir = '/%s/user/%s/%s' % (vo, username[0], username) baseDir = baseDir.rstrip('/') gLogger.info('Will search for files in %s' % baseDir) activeDirs = [baseDir] allFiles = [] emptyDirs = [] while len(activeDirs) > 0: currentDir = activeDirs.pop() res = fc.listDirectory(currentDir, withMetadata) if not res['OK']: gLogger.error("Error retrieving directory contents", "%s %s" % (currentDir, res['Message'])) elif currentDir in res['Value']['Failed']: gLogger.error( "Error retrieving directory contents", "%s %s" % (currentDir, res['Value']['Failed'][currentDir])) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] files = dirContents['Files'] if not subdirs and not files: emptyDirs.append(currentDir) gLogger.notice('%s: empty directory' % currentDir) else:
if not baseDir: if not vo: gLogger.error( 'Could not determine VO' ) Script.showHelp() baseDir = '/%s/user/%s/%s' % ( vo, username[0], username ) baseDir = baseDir.rstrip( '/' ) gLogger.info( 'Will search for files in %s' % baseDir ) activeDirs = [baseDir] allFiles = [] emptyDirs = [] while len( activeDirs ) > 0: currentDir = activeDirs.pop() res = fc.listDirectory( currentDir, withMetadata ) if not res['OK']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) ) elif currentDir in res['Value']['Failed']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) ) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] files = dirContents['Files'] if not subdirs and not files: emptyDirs.append( currentDir ) gLogger.notice( '%s: empty directory' % currentDir ) else: for subdir in sorted( subdirs, reverse = True ): if ( not withMetadata ) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays ): activeDirs.append( subdir )
class CatalogPlugInTestCase(unittest.TestCase): """ Base class for the CatalogPlugin test case """ def setUp(self): self.fullMetadata = ['Status', 'CheckSumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size'] self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths'] self.fileMetadata = self.fullMetadata + ['NumberOfLinks'] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assert_(valid) self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin' self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res,self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in range(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir,i) res = self.registerFile(lfn) self.assert_(res) self.files.append(lfn) def registerFile(self,lfn): pfn = 'protocol://host:port/storage/path%s' % lfn size = 10000000 se = 'DIRAC-storage' guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = {'PFN':pfn,'Size':size,'SE':se,'GUID':guid,'Checksum':adler} res = self.catalog.addFile(fileDict) return self.parseResult(res,lfn) def parseResult(self,res,path): self.assert_(res['OK']) self.assert_(res['Value']) self.assert_(res['Value']['Successful']) self.assert_(res['Value']['Successful'].has_key(path)) return res['Value']['Successful'][path] def parseError(self,res,path): self.assert_(res['OK']) self.assert_(res['Value']) self.assert_(res['Value']['Failed']) self.assert_(res['Value']['Failed'].has_key(path)) return res['Value']['Failed'][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res,self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res,self.destDir) toRemove = returnValue['Files'].keys() if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res,self.destDir) self.assert_(returnValue) def purgeFiles(self,lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn,True) replicas = self.parseResult(res,lfn) for se,pfn in replicas.items(): repDict = {} repDict[lfn] = {'PFN':pfn,'SE':se} res = self.catalog.removeReplica(repDict) self.parseResult(res,lfn) res = self.catalog.removeFile(lfn) self.parseResult(res,lfn) def tearDown(self): self.cleanDirectory()
if not baseDir: if not vo: gLogger.error( 'Could not determine VO' ) Script.showHelp() baseDir = '/%s/user/%s/%s' % ( vo, username[0], username ) baseDir = baseDir.rstrip('/') gLogger.info( 'Will search for files in %s' % baseDir ) activeDirs = [baseDir] allFiles = [] emptyDirs = [] while len( activeDirs ) > 0: currentDir = activeDirs[0] res = fc.listDirectory( currentDir, verbose ) activeDirs.remove( currentDir ) if not res['OK']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) ) elif res['Value']['Failed'].has_key( currentDir ): gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) ) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] empty = True for subdir, metadata in subdirs.items(): if ( not verbose ) or isOlderThan( metadata['CreationDate'], totalDays ): activeDirs.append( subdir ) empty = False for filename, fileInfo in dirContents['Files'].items(): metadata = fileInfo['MetaData']
class DataIntegrityClient( Client ): """ The following methods are supported in the service but are not mentioned explicitly here: getProblematic() Obtains a problematic file from the IntegrityDB based on the LastUpdate time getPrognosisProblematics(prognosis) Obtains all the problematics of a particular prognosis from the integrityDB getProblematicsSummary() Obtains a count of the number of problematics for each prognosis found getDistinctPrognosis() Obtains the distinct prognosis found in the integrityDB getTransformationProblematics(prodID) Obtains the problematics for a given production incrementProblematicRetry(fileID) Increments the retry count for the supplied file ID changeProblematicPrognosis(fileID,newPrognosis) Changes the prognosis of the supplied file to the new prognosis setProblematicStatus(fileID,status) Updates the status of a problematic in the integrityDB removeProblematic(self,fileID) This removes the specified file ID from the integrity DB insertProblematic(sourceComponent,fileMetadata) Inserts file with supplied metadata into the integrity DB """ def __init__( self, **kwargs ): Client.__init__( self, **kwargs ) self.setServer( 'DataManagement/DataIntegrity' ) self.dm = DataManager() self.fc = FileCatalog() ########################################################################## # # This section contains the specific methods for LFC->SE checks # def catalogDirectoryToSE( self, lfnDir ): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the LFC->SE check" ) gLogger.info( "-" * 40 ) if type( lfnDir ) in types.StringTypes: lfnDir = [lfnDir] res = self.__getCatalogDirectoryContents( lfnDir ) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.__checkPhysicalFiles( replicas, catalogMetadata ) if not res['OK']: return res resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas} return S_OK( resDict ) def catalogFileToSE( self, lfns ): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the LFC->SE check" ) gLogger.info( "-" * 40 ) if type( lfns ) in types.StringTypes: lfns = [lfns] res = self.__getCatalogMetadata( lfns ) if not res['OK']: return res catalogMetadata = res['Value'] res = self.__getCatalogReplicas( catalogMetadata.keys() ) if not res['OK']: return res replicas = res['Value'] res = self.__checkPhysicalFiles( replicas, catalogMetadata ) if not res['OK']: return res resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas} return S_OK( resDict ) def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ): """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the LFC->SE check" ) gLogger.info( "-" * 40 ) return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses ) def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ): """ This obtains the physical file metadata and checks the metadata against the catalog entries """ sePfns = {} pfnLfns = {} for lfn, replicaDict in replicas.items(): for se, pfn in replicaDict.items(): if ( ses ) and ( se not in ses ): continue if not sePfns.has_key( se ): sePfns[se] = [] sePfns[se].append( pfn ) pfnLfns[pfn] = lfn gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) ) for site in sortList( sePfns.keys() ): files = len( sePfns[site] ) gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) ) for se in sortList( sePfns.keys() ): pfns = sePfns[se] pfnDict = {} for pfn in pfns: pfnDict[pfn] = pfnLfns[pfn] sizeMismatch = [] res = self.__checkPhysicalFileMetadata( pfnDict, se ) if not res['OK']: gLogger.error( 'Failed to get physical file metadata.', res['Message'] ) return res for pfn, metadata in res['Value'].items(): if catalogMetadata.has_key( pfnLfns[pfn] ): if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ): sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) ) if sizeMismatch: self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' ) return S_OK() def __checkPhysicalFileMetadata( self, pfnLfns, se ): """ Check obtain the physical file metadata and check the files are available """ gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) ) res = StorageElement( se ).getFileMetadata( pfnLfns.keys() ) if not res['OK']: gLogger.error( 'Failed to get metadata for pfns.', res['Message'] ) return res pfnMetadataDict = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for pfn, reason in res['Value']['Failed'].items(): if re.search( 'File does not exist', reason ): missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) ) if missingReplicas: self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' ) lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for pfn, pfnMetadata in pfnMetadataDict.items(): if pfnMetadata['Lost']: lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) ) if pfnMetadata['Unavailable']: unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) ) if pfnMetadata['Size'] == 0: zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) ) if lostReplicas: self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' ) if unavailableReplicas: self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' ) if zeroSizeReplicas: self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' ) gLogger.info( 'Checking the integrity of physical files at %s complete' % se ) return S_OK( pfnMetadataDict ) ########################################################################## # # This section contains the specific methods for SE->LFC checks # def storageDirectoryToCatalog( self, lfnDir, storageElement ): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the SE->LFC check at %s" % storageElement ) gLogger.info( "-" * 40 ) if type( lfnDir ) in types.StringTypes: lfnDir = [lfnDir] res = self.__getStorageDirectoryContents( lfnDir, storageElement ) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement ) return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} ) def __checkCatalogForSEFiles( self, storageMetadata, storageElement ): gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) ) # RF_NOTE : this comment is completely wrong # First get all the PFNs as they should be registered in the catalog res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False ) if not res['OK']: gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] ) return res for pfn, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) for original, registered in res['Value']['Successful'].items(): storageMetadata[registered] = storageMetadata.pop( original ) # Determine whether these PFNs are registered and if so obtain the LFN res = self.fc.getLFNForPFN( storageMetadata.keys() ) if not res['OK']: gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] ) return res failedPfns = res['Value']['Failed'] notRegisteredPfns = [] for pfn, error in failedPfns.items(): if re.search( 'No such file or directory', error ): notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) ) failedPfns.pop( pfn ) if notRegisteredPfns: self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' ) if failedPfns: return S_ERROR( 'Failed to obtain LFNs for PFNs' ) pfnLfns = res['Value']['Successful'] for pfn in storageMetadata.keys(): pfnMetadata = storageMetadata.pop( pfn ) if pfn in pfnLfns.keys(): lfn = pfnLfns[pfn] storageMetadata[lfn] = pfnMetadata storageMetadata[lfn]['PFN'] = pfn # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata res = self.__getCatalogMetadata( storageMetadata.keys() ) if not res['OK']: return res catalogMetadata = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.items(): lfnStorageMetadata = storageMetadata[lfn] if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ): sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) ) if sizeMismatch: self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' ) gLogger.info( 'Checking storage files exist in the catalog complete' ) resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata} return S_OK( resDict ) def getStorageDirectoryContents( self, lfnDir, storageElement ): """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ return self.__getStorageDirectoryContents( lfnDir, storageElement ) def __getStorageDirectoryContents( self, lfnDir, storageElement ): """ Obtians the contents of the supplied directory on the storage """ gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( lfnDir ) if not res['OK']: gLogger.error( "Failed to get PFNs for directories", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectories = res['Value']['Successful'].values() res = se.exists( storageDirectories ) if not res['OK']: gLogger.error( "Failed to obtain existance of directories", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to determine existance of directory' ) directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sortList( directoryExists.keys() ): exists = directoryExists[directory] if exists: activeDirs.append( directory ) allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = se.listDirectory( currentDir ) activeDirs.remove( currentDir ) if not res['OK']: gLogger.error( 'Failed to get directory contents', res['Message'] ) return res elif res['Value']['Failed'].has_key( currentDir ): gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) ) return S_ERROR( res['Value']['Failed'][currentDir] ) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( dirContents['SubDirs'] ) fileMetadata = dirContents['Files'] # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention # res = { "Successful" : {}, "Failed" : {} } # for pfn in fileMetadata: # inRes = se.getPfnPath( pfn ) # if inRes["OK"]: # res["Successful"][pfn] = inRes["Value"] # else: # res["Failed"][pfn] = inRes["Message"] res = se.getLfnForPfn( fileMetadata.keys() ) if not res['OK']: gLogger.error( 'Failed to get directory content LFNs', res['Message'] ) return res for pfn, error in res['Value']['Failed'].items(): gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) ) if res['Value']['Failed']: return S_ERROR( "Failed to get LFNs for PFNs" ) pfnLfns = res['Value']['Successful'] for pfn, lfn in pfnLfns.items(): fileMetadata[pfn]['LFN'] = lfn allFiles.update( fileMetadata ) zeroSizeFiles = [] lostFiles = [] unavailableFiles = [] for pfn in sortList( allFiles.keys() ): if os.path.basename( pfn ) == 'dirac_directory': allFiles.pop( pfn ) else: metadata = allFiles[pfn] if metadata['Size'] == 0: zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) ) # if metadata['Lost']: # lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost')) # if metadata['Unavailable']: # unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable')) if zeroSizeFiles: self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' ) if lostFiles: self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' ) if unavailableFiles: self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' ) gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) ) return S_OK( allFiles ) def __getStoragePathExists( self, lfnPaths, storageElement ): gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( lfnPaths ) if not res['OK']: gLogger.error( "Failed to get PFNs for LFNs", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain PFNs from LFNs' ) lfnPfns = res['Value']['Successful'] pfnLfns = {} for lfn, pfn in lfnPfns.items(): pfnLfns[pfn] = lfn res = se.exists( pfnLfns ) if not res['OK']: gLogger.error( "Failed to obtain existance of paths", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to determine existance of paths' ) pathExists = res['Value']['Successful'] resDict = {} for pfn, exists in pathExists.items(): if exists: resDict[pfnLfns[pfn]] = pfn return S_OK( resDict ) ########################################################################## # # This section contains the specific methods for obtaining replica and metadata information from the catalog # def __getCatalogDirectoryContents( self, lfnDir ): """ Obtain the contents of the supplied directory """ gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) ) activeDirs = lfnDir allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = self.fc.listDirectory( currentDir ) activeDirs.remove( currentDir ) if not res['OK']: gLogger.error( 'Failed to get directory contents', res['Message'] ) return res elif res['Value']['Failed'].has_key( currentDir ): gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) ) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) zeroReplicaFiles = [] zeroSizeFiles = [] allReplicaDict = {} allMetadataDict = {} for lfn, lfnDict in allFiles.items(): lfnReplicas = {} for se, replicaDict in lfnDict['Replicas'].items(): lfnReplicas[se] = replicaDict['PFN'] if not lfnReplicas: zeroReplicaFiles.append( lfn ) allReplicaDict[lfn] = lfnReplicas allMetadataDict[lfn] = lfnDict['MetaData'] if lfnDict['MetaData']['Size'] == 0: zeroSizeFiles.append( lfn ) if zeroReplicaFiles: self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' ) if zeroSizeFiles: self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' ) gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) ) resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict} return S_OK( resDict ) def __getCatalogReplicas( self, lfns ): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) ) zeroReplicaFiles = [] res = self.fc.getReplicas( lfns, allStatus = True ) if not res['OK']: gLogger.error( 'Failed to get catalog replicas', res['Message'] ) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search( 'File has zero replicas', error ): zeroReplicaFiles.append( lfn ) if zeroReplicaFiles: self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' ) gLogger.info( 'Obtaining the replicas for files complete' ) return S_OK( allReplicas ) def __getCatalogMetadata( self, lfns ): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK( {} ) gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) ) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fc.getFileMetadata( lfns ) if not res['OK']: gLogger.error( 'Failed to get catalog metadata', res['Message'] ) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search( 'No such file or directory', error ): missingCatalogFiles.append( lfn ) if missingCatalogFiles: self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' ) for lfn, metadata in allMetadata.items(): if metadata['Size'] == 0: zeroSizeFiles.append( lfn ) if zeroSizeFiles: self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' ) gLogger.info( 'Obtaining the catalog metadata complete' ) return S_OK( allMetadata ) ########################################################################## # # This section contains the methods for inserting problematic files into the integrity DB # def __reportProblematicFiles( self, lfns, reason ): """ Simple wrapper function around setFileProblematic """ gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) ) for lfn in sortList( lfns ): gLogger.info( lfn ) res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' ) if not res['OK']: gLogger.info( 'Failed to update integrity DB with files', res['Message'] ) else: gLogger.info( 'Successfully updated integrity DB with files' ) def setFileProblematic( self, lfn, reason, sourceComponent = '' ): """ This method updates the status of the file in the FileCatalog and the IntegrityDB lfn - the lfn of the file reason - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type( lfn ) == types.ListType: lfns = lfn elif type( lfn ) == types.StringType: lfns = [lfn] else: errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN." gLogger.error( errStr ) return S_ERROR( errStr ) gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) ) fileMetadata = {} for lfn in lfns: fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''} res = self.insertProblematic( sourceComponent, fileMetadata ) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" ) return res def __reportProblematicReplicas( self, replicaTuple, se, reason ): """ Simple wrapper function around setReplicaProblematic """ gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) ) for lfn, pfn, se, reason in sortList( replicaTuple ): if lfn: gLogger.info( lfn ) else: gLogger.info( pfn ) res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' ) if not res['OK']: gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] ) else: gLogger.info( 'Successfully updated integrity DB with replicas' ) def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ): """ This method updates the status of the replica in the FileCatalog and the IntegrityDB The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis} lfn - the lfn of the file pfn - the pfn if available (otherwise '') se - the storage element of the problematic replica (otherwise '') prognosis - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type( replicaTuple ) == types.TupleType: replicaTuple = [replicaTuple] elif type( replicaTuple ) == types.ListType: pass else: errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples." gLogger.error( errStr ) return S_ERROR( errStr ) gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) ) replicaDict = {} for lfn, pfn, se, reason in replicaTuple: replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se} res = self.insertProblematic( sourceComponent, replicaDict ) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" ) return res for lfn in replicaDict.keys(): replicaDict[lfn]['Status'] = 'Problematic' res = self.fc.setReplicaStatus( replicaDict ) if not res['OK']: errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas." gLogger.error( errStr, res['Message'] ) return res failed = res['Value']['Failed'] successful = res['Value']['Successful'] resDict = {'Successful':successful, 'Failed':failed} return S_OK( resDict ) ########################################################################## # # This section contains the resolution methods for various prognoses # def __updateCompletedFiles( self, prognosis, fileID ): gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) ) return self.setProblematicStatus( fileID, 'Resolved' ) def __returnProblematicError( self, fileID, res ): self.incrementProblematicRetry( fileID ) gLogger.error( res['Message'] ) return res def __getRegisteredPFNLFN( self, pfn, storageElement ): res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False ) if not res['OK']: gLogger.error( "Failed to get registered PFN for physical files", res['Message'] ) return res for pfn, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) registeredPFN = res['Value']['Successful'][pfn] res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) ) if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ): return S_OK( False ) return S_OK( res['Value'] ) def __updateReplicaToChecked( self, problematicDict ): lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] prognosis = problematicDict['Prognosis'] problematicDict['Status'] = 'Checked' res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) ) return self.__updateCompletedFiles( prognosis, fileID ) def resolveCatalogPFNSizeMismatch( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] pfn = problematicDict['PFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) catalogSize = res['Value'] res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) storageSize = res['Value'] bkKCatalog = FileCatalog( ['BookkeepingDB'] ) res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID ) return self.__updateReplicaToChecked( problematicDict ) if ( catalogSize == bookkeepingSize ): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if len( res['Value'] ) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID ) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID ) res = self.dm.removeReplica( se, lfn ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID ) if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID ) res = self.__updateReplicaToChecked( problematicDict ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' ) gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID ) return self.incrementProblematicRetry( fileID ) def resolvePFNNotRegistered( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis """ lfn = problematicDict['LFN'] pfn = problematicDict['PFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement( seName ) res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if not res['Value']: # The file does not exist in the catalog res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) elif not res['OK']: return self.__returnProblematicError( fileID, res ) storageMetadata = res['Value'] if storageMetadata['Lost']: gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNLost' ) if storageMetadata['Unavailable']: gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID ) return self.incrementProblematicRetry( fileID ) # HACK until we can obtain the space token descriptions through GFAL site = seName.split( '_' )[0].split( '-' )[0] if not storageMetadata['Cached']: if lfn.endswith( '.raw' ): seName = '%s-RAW' % site else: seName = '%s-RDST' % site elif storageMetadata['Migrated']: if lfn.startswith( '/lhcb/data' ): seName = '%s_M-DST' % site else: seName = '%s_MC_M-DST' % site else: if lfn.startswith( '/lhcb/data' ): seName = '%s-DST' % site else: seName = '%s_MC-DST' % site problematicDict['SE'] = seName res = se.getPfnForProtocol( pfn, withPort = False ) if not res['OK']: return self.__returnProblematicError( fileID, res ) for pfn, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) problematicDict['PFN'] = res['Value']['Successful'][pfn] res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']['Size'] != storageMetadata['Size']: gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) def resolveLFNCatalogMissing( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']: return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID ) # Remove the file from all catalogs # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID ) def resolvePFNMissing( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis """ pfn = problematicDict['PFN'] se = problematicDict['SE'] lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if not res['Value']: gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID ) return self.__updateCompletedFiles( 'PFNMissing', fileID ) res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']: gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID ) return self.__updateReplicaToChecked( problematicDict ) gLogger.info( "PFNMissing replica (%d) does not exist" % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) replicas = res['Value'] seSite = se.split( '_' )[0].split( '-' )[0] found = False print replicas for replicaSE in replicas.keys(): if re.search( seSite, replicaSE ): found = True problematicDict['SE'] = replicaSE se = replicaSE if not found: gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID ) return self.__updateCompletedFiles( 'PFNMissing', fileID ) gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if len( replicas ) == 1: gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' ) res = self.dm.replicateAndRegister( problematicDict['LFN'], se ) if not res['OK']: return self.__returnProblematicError( fileID, res ) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles( 'PFNMissing', fileID ) def resolvePFNUnavailable( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis """ pfn = problematicDict['PFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): # The file is no longer Unavailable but has now dissapeared completely gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) if ( not res['OK'] ) or res['Value']['Unavailable']: gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID ) return self.incrementProblematicRetry( fileID ) if res['Value']['Lost']: gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNLost' ) gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID ) # Need to make the replica okay in the Catalog return self.__updateReplicaToChecked( problematicDict ) def resolvePFNZeroSize( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis """ pfn = problematicDict['PFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement( seName ) res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) storageSize = res['Value'] if storageSize == 0: res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) res = self.__getRegisteredPFNLFN( pfn, seName ) if not res['OK']: return self.__returnProblematicError( fileID, res ) lfn = res['Value'] if not lfn: gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) catalogSize = res['Value']['Size'] if catalogSize != storageSize: gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' ) return self.__updateCompletedFiles( 'PFNZeroSize', fileID ) ############################################################################################ def resolveLFNZeroReplicas( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) ) if res['OK'] and res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID ) else: gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID ) pfnsFound = False for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ): res = self.__getStoragePathExists( [lfn], storageElementName ) if res['Value'].has_key( lfn ): gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) ) pfn = res['Value'][lfn] self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' ) pfnsFound = True if not pfnsFound: gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) ) if not res['OK']: gLogger.error( res['Message'] ) # Increment the number of retries for this file self.server.incrementProblematicRetry( fileID ) return res gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID ) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
class CatalogPlugInTestCase(unittest.TestCase): """Base class for the CatalogPlugin test case""" def setUp(self): self.fullMetadata = [ "Status", "ChecksumType", "OwnerRole", "CreationDate", "Checksum", "ModificationDate", "OwnerDN", "Mode", "GUID", "Size", ] self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"] self.fileMetadata = self.fullMetadata + ["NumberOfLinks"] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assertTrue(valid) self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin" self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in range(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir, i) res = self.registerFile(lfn) self.assertTrue(res) self.files.append(lfn) def registerFile(self, lfn): pfn = "protocol://host:port/storage/path%s" % lfn size = 10000000 se = "DIRAC-storage" guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = { "PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler } res = self.catalog.addFile(fileDict) return self.parseResult(res, lfn) def parseResult(self, res, path): self.assertTrue(res["OK"]) self.assertTrue(res["Value"]) self.assertTrue(res["Value"]["Successful"]) self.assertTrue(path in res["Value"]["Successful"]) return res["Value"]["Successful"][path] def parseError(self, res, path): self.assertTrue(res["OK"]) self.assertTrue(res["Value"]) self.assertTrue(res["Value"]["Failed"]) self.assertTrue(path in res["Value"]["Failed"]) return res["Value"]["Failed"][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res, self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) toRemove = list(returnValue["Files"]) if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) self.assertTrue(returnValue) def purgeFiles(self, lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn, True) replicas = self.parseResult(res, lfn) for se, pfn in replicas.items(): repDict = {} repDict[lfn] = {"PFN": pfn, "SE": se} res = self.catalog.removeReplica(repDict) self.parseResult(res, lfn) res = self.catalog.removeFile(lfn) self.parseResult(res, lfn) def tearDown(self): self.cleanDirectory()
if not vo: gLogger.error( 'Could not determine VO' ) Script.showHelp() baseDir = '/%s/user/%s/%s' % ( vo, username[0], username ) baseDir = baseDir.rstrip( '/' ) gLogger.notice( 'Will search for files in %s%s' % ( baseDir, ( ' matching %s' % wildcard ) if wildcard else '' ) ) activeDirs = [baseDir] allFiles = [] emptyDirs = [] while len( activeDirs ) > 0: currentDir = activeDirs.pop() res = fc.listDirectory( currentDir, withMetadata, timeout = 360 ) if not res['OK']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) ) elif currentDir in res['Value']['Failed']: gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) ) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] files = dirContents['Files'] if not subdirs and not files: emptyDirs.append( currentDir ) gLogger.notice( '%s: empty directory' % currentDir ) else: for subdir in sorted( subdirs, reverse = True ): if ( not withMetadata ) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays ): activeDirs.append( subdir )
if not vo: gLogger.error('Could not determine VO') Script.showHelp() baseDir = '/%s/user/%s/%s' % (vo, username[0], username) baseDir = baseDir.rstrip('/') gLogger.info('Will search for files in %s' % baseDir) activeDirs = [baseDir] allFiles = [] allDirs = [] emptyDirs = [] while len(activeDirs) > 0: currentDir = activeDirs[0] res = fc.listDirectory(currentDir, verbose) activeDirs.remove(currentDir) if not res['OK']: gLogger.error("Error retrieving directory contents", "%s %s" % (currentDir, res['Message'])) elif res['Value']['Failed'].has_key(currentDir): gLogger.error( "Error retrieving directory contents", "%s %s" % (currentDir, res['Value']['Failed'][currentDir])) else: dirContents = res['Value']['Successful'][currentDir] subdirs = dirContents['SubDirs'] empty = True for subdir, metadata in subdirs.items(): if (not verbose) or isOlderThan(metadata['CreationDate'], totalDays):
class CatalogPlugInTestCase(unittest.TestCase): """ Base class for the CatalogPlugin test case """ def setUp(self): self.fullMetadata = [ "Status", "ChecksumType", "OwnerRole", "CreationDate", "Checksum", "ModificationDate", "OwnerDN", "Mode", "GUID", "Size", ] self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"] self.fileMetadata = self.fullMetadata + ["NumberOfLinks"] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assert_(valid) self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin" self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in range(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir, i) res = self.registerFile(lfn) self.assert_(res) self.files.append(lfn) def registerFile(self, lfn): pfn = "protocol://host:port/storage/path%s" % lfn size = 10000000 se = "DIRAC-storage" guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = {"PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler} res = self.catalog.addFile(fileDict) return self.parseResult(res, lfn) def parseResult(self, res, path): self.assert_(res["OK"]) self.assert_(res["Value"]) self.assert_(res["Value"]["Successful"]) self.assert_(res["Value"]["Successful"].has_key(path)) return res["Value"]["Successful"][path] def parseError(self, res, path): self.assert_(res["OK"]) self.assert_(res["Value"]) self.assert_(res["Value"]["Failed"]) self.assert_(res["Value"]["Failed"].has_key(path)) return res["Value"]["Failed"][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res, self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) toRemove = returnValue["Files"].keys() if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) self.assert_(returnValue) def purgeFiles(self, lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn, True) replicas = self.parseResult(res, lfn) for se, pfn in replicas.items(): repDict = {} repDict[lfn] = {"PFN": pfn, "SE": se} res = self.catalog.removeReplica(repDict) self.parseResult(res, lfn) res = self.catalog.removeFile(lfn) self.parseResult(res, lfn) def tearDown(self): self.cleanDirectory()