示例#1
0
    def initialize(self):
        """Sets defaults
    """
        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transClient = TransformationClient()
        self.reqClient = ReqClient()
        self.consChecks = ConsistencyChecks(interactive=False,
                                            transClient=self.transClient)

        transformationTypes = Operations().getValue(
            'Transformations/DataProcessing', [])
        extendableTTypes = Operations().getValue(
            'Transformations/ExtendableTransfTypes', ['MCSimulation'])
        self.transformationTypes = list(
            set(transformationTypes) - set(extendableTTypes))

        return S_OK()
示例#2
0
  def setUp( self ):

    self.dmMock = Mock()
    self.dmMock.getReplicas.return_value = {'OK': True, 'Value':{'Successful':{'bb.raw':'metadataPippo'},
                                                                 'Failed':{}}}

    self.cc = ConsistencyChecks( transClient = Mock(), dm = self.dmMock, bkClient = bkc_mock )
    self.fileTypes = [['SEMILEPTONIC.DST', 'LOG', 'RAW'], ['SEMILEPTONIC.DST', 'LOG', 'RAW'], ['SEMILEPTONIC.DST'], ['SEMILEPTONIC.DST']]
    self.cc.fileTypesExcluded = ['LOG']
    self.cc.prod = 0
    self.maxDiff = None
示例#3
0
  Script.registerSwitch('', 'FixIt', '   Take action to fix the catalogs')
  Script.setUsageMessage('\n'.join([__doc__,
                                    'Usage:',
                                    '  %s [option|cfgfile] [values]' % Script.scriptName, ]))
  dmScript = DMScript()
  dmScript.registerDMSwitches()  # Directory
  Script.parseCommandLine(ignoreErrors=True)
  fixIt = False
  for opt, val in Script.getUnprocessedSwitches():
    if opt == 'FixIt':
      fixIt = True

  # imports
  from DIRAC import gLogger
  from LHCbDIRAC.DataManagementSystem.Client.ConsistencyChecks import ConsistencyChecks
  cc = ConsistencyChecks()
  cc.directories = dmScript.getOption('Directory', [])
  cc.lfns = dmScript.getOption('LFNs', []) + [lfn for arg in Script.getPositionalArgs() for lfn in arg.split(',')]
  bkQuery = dmScript.getBKQuery(visible='All')
  if bkQuery.getQueryDict() != {'Visible': 'All'}:
    bkQuery.setOption('ReplicaFlag', 'All')
    cc.bkQuery = bkQuery
  seList = dmScript.getOption('SEs', [])
  if not seList:
    dmScript.setSEs('Tier1-Archive')
    seList = dmScript.getOption('SEs', [])

  from LHCbDIRAC.DataManagementSystem.Client.CheckExecutors import doCheckSE
  doCheckSE(cc, seList, fixIt)
示例#4
0
    def __init__(self):
        """ Extending DIRAC's DIRACDataIntegrityClient init
    """
        super(DataIntegrityClient, self).__init__()

        self.cc = ConsistencyChecks()
示例#5
0
class DataIntegrityClient(DIRACDataIntegrityClient):
    def __init__(self):
        """ Extending DIRAC's DIRACDataIntegrityClient init
    """
        super(DataIntegrityClient, self).__init__()

        self.cc = ConsistencyChecks()

    ##########################################################################
    #
    # This section contains the specific methods for BK->FC checks
    #

    def productionToCatalog(self, productionID):
        """  This obtains the file information from the BK and checks these files are present in the FC.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the BK->FC check")
        gLogger.info("-" * 40)
        res = self.__getProductionFiles(productionID)
        if not res['OK']:
            return res
        noReplicaFiles = res['Value']['GotReplicaNo']
        yesReplicaFiles = res['Value']['GotReplicaYes']
        # For the files marked as existing we perfom catalog check
        res = self.cc._getCatalogMetadata(yesReplicaFiles)
        if not res['OK']:
            return res
        catalogMetadata, missingCatalogFiles, zeroSizeFiles = res['Value']
        if missingCatalogFiles:
            self._reportProblematicFiles(missingCatalogFiles,
                                         'LFNCatalogMissing')
        if zeroSizeFiles:
            self._reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')

        # Try and get the metadata for files that shouldn't exist in the catalog
        if noReplicaFiles:
            res = self.__checkCatalogForBKNoReplicas(noReplicaFiles)
            if not res['OK']:
                return res
            catalogMetadata.update(res['Value'])
        # Get the replicas for the files found to exist in the catalog
        res = self.cc._getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas, zeroReplicaFiles = res['Value']
        if zeroReplicaFiles:
            self._reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def __checkCatalogForBKNoReplicas(self, lfns):
        """ Checks the catalog existence for given files
    """
        gLogger.info('Checking the catalog existence of %s files' % len(lfns))

        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        existingCatalogFiles = allMetadata.keys()
        if existingCatalogFiles:
            self._reportProblematicFiles(existingCatalogFiles, 'BKReplicaNo')
        gLogger.info('Checking the catalog existence of files complete')
        return S_OK(allMetadata)

    def __getProductionFiles(self, productionID):
        """ This method queries the bookkeeping and obtains the file metadata for the given production
    """
        from DIRAC.Core.DISET.RPCClient import RPCClient
        gLogger.info("Attempting to get files for production %s" %
                     productionID)
        bk = RPCClient('Bookkeeping/BookkeepingManager')
        res = bk.getProductionFiles(productionID, 'ALL')
        if not res['OK']:
            return res
        yesReplicaFiles = []
        noReplicaFiles = []
        badReplicaFiles = []
        badBKFileSize = []
        badBKGUID = []
        allMetadata = res['Value']
        gLogger.info("Obtained at total of %s files" % len(allMetadata.keys()))
        totalSize = 0
        for lfn, bkMetadata in allMetadata.iteritems():
            if bkMetadata['FileType'] != 'LOG':
                if bkMetadata['GotReplica'] == 'Yes':
                    yesReplicaFiles.append(lfn)
                    if bkMetadata['FileSize']:
                        totalSize += long(bkMetadata['FileSize'])
                elif bkMetadata['GotReplica'] == 'No':
                    noReplicaFiles.append(lfn)
                else:
                    badReplicaFiles.append(lfn)
                if not bkMetadata['FileSize']:
                    badBKFileSize.append(lfn)
                if not bkMetadata['GUID']:
                    badBKGUID.append(lfn)
        if badReplicaFiles:
            self._reportProblematicFiles(badReplicaFiles, 'BKReplicaBad')
        if badBKFileSize:
            self._reportProblematicFiles(badBKFileSize, 'BKSizeBad')
        if badBKGUID:
            self._reportProblematicFiles(badBKGUID, 'BKGUIDBad')
        gLogger.info("%s files marked with replicas with total size %s bytes" %
                     (len(yesReplicaFiles), totalSize))
        gLogger.info("%s files marked without replicas" % len(noReplicaFiles))
        resDict = {
            'BKMetadata': allMetadata,
            'GotReplicaYes': yesReplicaFiles,
            'GotReplicaNo': noReplicaFiles
        }
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the specific methods for FC->BK checks
    #

    def catalogDirectoryToBK(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog for
      the supplied directory and checks against the BK.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->BK check")
        gLogger.info("-" * 40)
        if isinstance(lfnDir, basestring):
            lfnDir = [lfnDir]
        res = self.__getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        if not catalogMetadata:
            gLogger.warn('No files found in directory %s' % lfnDir)
            return S_OK(resDict)
        lfns = []
        for repDict in replicas:
            lfns.append(repDict)
        missingLFNs, noFlagLFNs, _okLFNs = self.cc._getBKMetadata(lfns)
        if missingLFNs:
            self._reportProblematicFiles(missingLFNs, 'LFNBKMissing')
        if noFlagLFNs:
            self._reportProblematicFiles(noFlagLFNs, 'BKReplicaNo')
        return S_OK(resDict)

    def catalogFileToBK(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->BK check")
        gLogger.info("-" * 40)
        if type(lfns) in types.StringTypes:
            lfns = [lfns]

        res = self.cc._getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata, missingCatalogFiles, zeroSizeFiles = res['Value']
        if missingCatalogFiles:
            self._reportProblematicFiles(missingCatalogFiles,
                                         'LFNCatalogMissing')
        if zeroSizeFiles:
            self._reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')

        res = self.cc._getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas, _zeroReplicaFiles = res['Value']

        lfns = []
        for repDict in replicas:
            lfns.append(repDict)
        missingLFNs, noFlagLFNs, _okLFNs = self.cc._getBKMetadata(lfns)
        if missingLFNs:
            self._reportProblematicFiles(missingLFNs, 'LFNBKMissing')
        if noFlagLFNs:
            self._reportProblematicFiles(noFlagLFNs, 'BKReplicaNo')

        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def resolveBKReplicaYes(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the BKReplicaYes prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        removeBKFile = False
        # If the file does not exist in the catalog
        if not res['Value']:
            gLogger.info(
                "BKReplicaYes file (%d) does not exist in the catalog. Removing..."
                % fileID)
            removeBKFile = True
        else:
            gLogger.info(
                "BKReplicaYes file (%d) found to exist in the catalog" %
                fileID)
            # If the file has no replicas in the catalog
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if (not res['OK']) and (res['Message']
                                    == 'File has zero replicas'):
                gLogger.info(
                    "BKReplicaYes file (%d) found to exist without replicas. Removing..."
                    % fileID)
                removeBKFile = True
        if removeBKFile:
            # Remove the file from the BK because it does not exist
            res = returnSingleResult(
                FileCatalog(catalogs=['BookkeepingDB']).removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info("BKReplicaYes file (%d) removed from bookkeeping" %
                         fileID)
        return self.__updateCompletedFiles('BKReplicaYes', fileID)

    def resolveBKReplicaNo(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the BKReplicaNo prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If the file exists in the catalog
        if not res['Value']:
            return self.__updateCompletedFiles('BKReplicaNo', fileID)
        gLogger.info("BKReplicaNo file (%d) found to exist in the catalog" %
                     fileID)
        # and has available replicas
        res = returnSingleResult(self.fc.getCatalogReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("BKReplicaNo file (%d) found to have no replicas" %
                         fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        gLogger.info("BKReplicaNo file (%d) found to have replicas" % fileID)
        res = returnSingleResult(
            FileCatalog(catalogs=['BookkeepingDB']).addFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('BKReplicaNo', fileID)

    def checkPhysicalFiles(self,
                           replicas,
                           catalogMetadata,
                           ses=[],
                           fixIt=False):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the FC->SE check")
        gLogger.info("-" * 40)

        seLfns = {}
        for lfn, replicaDict in replicas.iteritems():
            for se in replicaDict:
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sorted(seLfns):
            lfns = seLfns[se]

            sizeMismatch = []
            checksumMismatch = []
            checksumBadInFC = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].iteritems():
                if lfn in catalogMetadata:
                    if (metadata['Size'] != catalogMetadata[lfn]['Size']) and (
                            metadata['Size'] != 0):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
                    if metadata['Checksum'] != catalogMetadata[lfn]['Checksum']:
                        if metadata['Checksum'].replace(
                                'x', '0'
                        ) == catalogMetadata[lfn]['Checksum'].replace(
                                'x', '0'):
                            checksumBadInFC.append(
                                (lfn, 'deprecatedUrl', se,
                                 "%s %s" % (metadata['Checksum'],
                                            catalogMetadata[lfn]['Checksum'])))
                        else:
                            checksumMismatch.append(
                                (lfn, 'deprecatedUrl', se,
                                 "%s %s" % (metadata['Checksum'],
                                            catalogMetadata[lfn]['Checksum'])))
            if sizeMismatch:
                self.reportProblematicReplicas(sizeMismatch,
                                               se,
                                               'CatalogPFNSizeMismatch',
                                               fixIt=fixIt)
            if checksumMismatch:
                self.reportProblematicReplicas(checksumMismatch,
                                               se,
                                               'CatalogChecksumMismatch',
                                               fixIt=fixIt)
            if checksumBadInFC:
                self.reportProblematicReplicas(checksumBadInFC,
                                               se,
                                               'CatalogChecksumToBeFixed',
                                               fixIt=fixIt)
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        pfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].iteritems():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, metadata in pfnMetadataDict.iteritems():
            if metadata.get('Lost', False):
                lostReplicas.append((lfn, se, 'PFNLost'))
            if metadata.get('Unavailable', not metadata['Accessible']):
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if metadata['Size'] == 0:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.reportProblematicReplicas(unavailableReplicas, se,
                                           'PFNUnavailable')
        if zeroSizeReplicas:
            self.reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(pfnMetadataDict)

    def reportProblematicReplicas(self, replicaTuple, se, reason, fixIt=False):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, pfn, se, reason1 in sorted(replicaTuple):
            if reason1 == reason:
                reason1 = ''
            if lfn:
                gLogger.info(lfn, reason1)
            else:
                gLogger.info(pfn, reason1)
        if fixIt:
            res = self.setReplicaProblematic(
                replicaTuple, sourceComponent='DataIntegrityClient')
            if not res['OK']:
                gLogger.info('Failed to update integrity DB with replicas',
                             res['Message'])
            else:
                gLogger.info('Successfully updated integrity DB with replicas')

    ##########################################################################
    #
    # This section contains the specific methods for obtaining replica and metadata information from the catalog
    #

    def __getCatalogDirectoryContents(self, lfnDir):
        """ Obtain the contents of the supplied directory
    """
        gLogger.info('Obtaining the catalog contents for %s directories' %
                     len(lfnDir))

        activeDirs = list(lfnDir)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.fc.listDirectory(currentDir, verbose=True)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif res['Value']['Failed'].has_key(currentDir):
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])

        zeroReplicaFiles = []
        zeroSizeFiles = []
        allReplicaDict = {}
        allMetadataDict = {}
        for lfn, lfnDict in allFiles.iteritems():
            lfnReplicas = {}
            for se, replicaDict in lfnDict['Replicas'].iteritems():
                lfnReplicas[se] = replicaDict['PFN']
            if not lfnReplicas:
                zeroReplicaFiles.append(lfn)
            allReplicaDict[lfn] = lfnReplicas
            allMetadataDict[lfn] = lfnDict['MetaData']
            if lfnDict['MetaData']['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroReplicaFiles:
            self._reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        if zeroSizeFiles:
            self._reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info(
            'Obtained at total of %s files for the supplied directories' %
            len(allMetadataDict))
        resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict}
        return S_OK(resDict)
示例#6
0
def doCheckFC2BK(cc, fixFC=False, fixBK=False, listAffectedRuns=False):
    """
  Method actually calling for the the check using ConsistencyChecks module
  It prints out results and calls corrective actions if required
  """
    cc.checkFC2BK()

    maxFiles = 10
    suffix = ''
    nb = 0
    baseName = 'CheckFC2BK' + ('-%s' % cc.prod if cc.prod else '')
    while True:
        fileName = baseName + '%s.txt' % suffix
        if not os.path.exists(fileName):
            break
        nb += 1
        suffix = '-%d' % nb
    fp = None
    if cc.existLFNsBKRepNo:
        gLogger.notice('>>>>')

        affectedRuns = list(
            set(str(run) for run in cc.existLFNsBKRepNo.itervalues()))
        gLogger.error("%d files are in the FC but have replica = NO in BK" %
                      len(cc.existLFNsBKRepNo))
        from LHCbDIRAC.DataManagementSystem.Client.ConsistencyChecks import ConsistencyChecks
        ccAux = ConsistencyChecks()
        gLogger.notice("====== Now checking %d files from FC to SE ======" %
                       len(cc.existLFNsBKRepNo))
        ccAux.lfns = cc.existLFNsBKRepNo.keys()
        doCheckFC2SE(ccAux, bkCheck=False, fixIt=fixFC, fixOption='FixFC')
        cc.existLFNsBKRepNo = sorted(
            set(cc.existLFNsBKRepNo) - set(ccAux.existLFNsNoSE) -
            set(ccAux.existLFNsNotExisting) - set(ccAux.existLFNsBadFiles))
        if cc.existLFNsBKRepNo:
            gLogger.notice(
                "====== Completed, %d files are in the FC and SE but have replica = NO in BK ======"
                % len(cc.existLFNsBKRepNo))
            if fp is None:
                fp = open(fileName, 'w')
            fp.write('\nInFCButBKNo '.join([''] + sorted(cc.existLFNsBKRepNo)))
            res = cc.bkClient.getFileMetadata(cc.existLFNsBKRepNo)
            if not res['OK']:
                gLogger.fatal("Unable to get file metadata", res['Message'])
                return
            if res['Value']['Failed']:
                gLogger.error("No metadata found for some files",
                              '%d files' % len(res['Value']['Failed']))
            success = res['Value']['Successful']
            filesInvisible = set(lfn for lfn, meta in success.iteritems()
                                 if meta['VisibilityFlag'][0].upper() == 'N')
            filesVisible = set(success) - filesInvisible
            gLogger.notice('%d files are visible, %d files are invisible' %
                           (len(filesVisible), len(filesInvisible)))
            # Try and print the whole as INFO (in case --Verbose was used).
            #   If nothing printed, print a limited number of files as ERROR
            if not gLogger.info('\n'.join(
                    '%s : Visi %s' %
                (lfn, success.get(lfn, {}).get('VisibilityFlag', '?'))
                    for lfn in sorted(cc.existLFNsBKRepNo))):
                if len(cc.existLFNsBKRepNo) > maxFiles:
                    gLogger.notice('First %d files:' % maxFiles)
                gLogger.error('\n'.join(
                    '%s : Visi %s' %
                    (lfn, success.get(lfn, {}).get('VisibilityFlag', '?'))
                    for lfn in sorted(cc.existLFNsBKRepNo)[0:maxFiles]))
            if listAffectedRuns:
                gLogger.notice('Affected runs: %s' % ','.join(affectedRuns))
            gLogger.notice("Full list of files:    grep InFCButBKNo %s" %
                           fileName)
            if fixBK:
                gLogger.notice("Going to fix them, setting the replica flag")
                res = cc.bkClient.addFiles(list(success))
                if res['OK']:
                    gLogger.notice(
                        "\tSuccessfully added replica flag to %d files" %
                        len(success))
                else:
                    gLogger.error('Failed to set the replica flag',
                                  res['Message'])
            elif fixFC:
                gLogger.notice(
                    "Going to fix them, by removing from the FC and storage")
                __removeFile(success)
            else:
                gLogger.notice(
                    "Use --FixBK to fix it (set the replica flag) or --FixFC (for removing from FC and storage)"
                )
        else:
            gLogger.notice(
                "====== Completed, no files in the FC with replica = NO in BK ======"
            )
        gLogger.notice('<<<<')

    else:
        gLogger.notice("No files in FC with replica = NO in BK -> OK!")

    if cc.existLFNsNotInBK:
        gLogger.notice('>>>>')

        gLogger.error("%d files are in the FC but are NOT in BK:" %
                      len(cc.existLFNsNotInBK))
        if fp is None:
            fp = open(fileName, 'w')
        fp.write('\nInFCNotInBK '.join([''] + sorted(cc.existLFNsNotInBK)))
        if not gLogger.info('\n'.join(sorted(cc.existLFNsNotInBK))):
            if len(cc.existLFNsNotInBK) > maxFiles:
                gLogger.notice('First %d files:' % maxFiles)
            gLogger.error('\n'.join(sorted(cc.existLFNsNotInBK[0:maxFiles])))
        gLogger.notice("Full list of files:    grep InFCNotInBK %s" % fileName)
        if fixFC:
            gLogger.notice(
                "Going to fix them, by removing from the FC and storage")
            __removeFile(cc.existLFNsNotInBK)
        else:
            gLogger.notice(
                "Use --FixFC to fix it (remove from FC and storage)")
        gLogger.notice('<<<<')

    else:
        gLogger.notice("No files in FC not in BK -> OK!")
    if fp is not None:
        fp.close()
示例#7
0
class DataRecoveryAgent(AgentModule):
    """ Standard DIRAC agent class
  """
    def __init__(self, *args, **kwargs):
        """ c'tor
    """
        AgentModule.__init__(self, *args, **kwargs)

        self.transClient = None
        self.reqClient = None
        self.consChecks = None

        self.enableFlag = True
        self.transformationTypes = []
        self.transLogger = self.log

    #############################################################################

    def initialize(self):
        """Sets defaults
    """
        self.am_setOption('shifterProxy', 'ProductionManager')

        self.transClient = TransformationClient()
        self.reqClient = ReqClient()
        self.consChecks = ConsistencyChecks(interactive=False,
                                            transClient=self.transClient)

        transformationTypes = Operations().getValue(
            'Transformations/DataProcessing', [])
        extendableTTypes = Operations().getValue(
            'Transformations/ExtendableTransfTypes', ['MCSimulation'])
        self.transformationTypes = list(
            set(transformationTypes) - set(extendableTTypes))

        return S_OK()

    #############################################################################
    def execute(self):
        """ The main execution method.
    """
        # Configuration settings
        self.enableFlag = self.am_getOption('EnableFlag', True)
        self.log.verbose('Enable flag is %s' % self.enableFlag)
        if not self.transformationTypes:
            self.log.warn("No transformation types to look for... aborting")
            return S_OK()

        transformationStatus = self.am_getOption('TransformationStatus',
                                                 ['Active', 'Completing'])
        fileSelectionStatus = self.am_getOption('FileSelectionStatus',
                                                ['Assigned', 'MaxReset'])
        unrecoverableStatus = self.am_getOption('UnrecoverableStatus',
                                                ['MaxReset'])
        updateStatus = self.am_getOption('FileUpdateStatus', 'Unused')
        wmsStatusList = self.am_getOption('WMSStatus', ['Failed'])

        # only worry about files > 12hrs since last update
        selectDelay = self.am_getOption('SelectionDelay', 1)  # hours

        transformationDict = {}
        for transStatus in transformationStatus:
            result = self.__getEligibleTransformations(
                transStatus, self.transformationTypes)
            if not result['OK']:
                self.log.error(
                    "Could not obtain eligible transformations",
                    "Status '%s': %s" % (transStatus, result['Message']))
                return result

            if not result['Value']:
                self.log.info(
                    'No "%s" transformations of types %s to process.' %
                    (transStatus, ', '.join(self.transformationTypes)))
                continue

            transformationDict.update(result['Value'])

        self.log.info(
            'Selected %d transformations of types %s' %
            (len(transformationDict), ', '.join(self.transformationTypes)))
        self.log.verbose('Transformations selected:\n%s' %
                         (', '.join(transformationDict)))

        for transformation, typeName in transformationDict.iteritems():
            self.transLogger = self.log.getSubLogger('Trans-%s' %
                                                     transformation)
            result = self.__selectTransformationFiles(transformation,
                                                      fileSelectionStatus)
            if not result['OK']:
                self.transLogger.error(
                    'Could not select files for transformation',
                    '%s: %s' % (transformation, result['Message']))
                continue
            fileDict = result['Value']
            if not fileDict:
                self.transLogger.verbose(
                    'No files in status %s selected for transformation %s' %
                    (', '.join(fileSelectionStatus), transformation))
                continue

            title = 'Looking at transformation %s, type %s ' % (transformation,
                                                                typeName)
            self.transLogger.info('=' * len(title))
            self.transLogger.info(title)

            self.transLogger.info(
                'Selected %d files with status %s' %
                (len(fileDict), ','.join(fileSelectionStatus)))
            result = self.__obtainWMSJobIDs(transformation, fileDict,
                                            selectDelay, wmsStatusList)
            if not result['OK']:
                self.transLogger.error(
                    "Could not obtain jobs for files of transformation",
                    result['Message'])
                continue
            jobFileDict = result['Value']
            if not jobFileDict:
                self.transLogger.info('No %s jobs found for selected files' %
                                      ' or '.join(wmsStatusList))
                continue

            self.transLogger.verbose(
                "Looking at WMS jobs %s" %
                ','.join(str(jobID) for jobID in jobFileDict))

            fileCount = sum(
                len(lfnList) for lfnList in jobFileDict.itervalues())
            self.transLogger.verbose(
                '%s files are selected after examining WMS jobs' %
                (str(fileCount) if fileCount else 'No'))
            if not fileCount:
                continue

            result = self.__removePendingRequestsJobs(jobFileDict)
            if not result['OK']:
                self.transLogger.error(
                    "Error while removing jobs with pending requests",
                    result['Message'])
                continue
            # This method modifies the input dictionary
            if not jobFileDict:
                self.transLogger.info(
                    'No WMS jobs without pending requests to process.')
                continue

            fileCount = sum(
                len(lfnList) for lfnList in jobFileDict.itervalues())
            self.transLogger.info(
                '%s files are selected in %d jobs after removing any job with pending requests'
                % (str(fileCount) if fileCount else 'No', len(jobFileDict)))
            if not fileCount:
                continue

            jobsThatDidntProduceOutputs, jobsThatProducedOutputs = self.__checkdescendants(
                transformation, jobFileDict)
            title = '======== Transformation %s: results ========' % transformation
            self.transLogger.info(title)
            self.transLogger.info('\tTotal jobs that can be updated now: %d' %
                                  len(jobsThatDidntProduceOutputs))
            if jobsThatProducedOutputs:
                self.transLogger.info('\t%d jobs have descendants' %
                                      len(jobsThatProducedOutputs))
            else:
                self.transLogger.info('\tNo jobs have descendants')

            filesToUpdate = []
            filesMaxReset = []
            filesWithDescendants = []
            for job, fileList in jobFileDict.iteritems():
                if job in jobsThatDidntProduceOutputs:
                    recoverableFiles = set(
                        lfn for lfn in fileList
                        if fileDict[lfn][1] not in unrecoverableStatus)
                    filesToUpdate += list(recoverableFiles)
                    filesMaxReset += list(set(fileList) - recoverableFiles)
                elif job in jobsThatProducedOutputs:
                    filesWithDescendants += fileList

            if filesToUpdate:
                self.transLogger.info("\tUpdating %d files to '%s'" %
                                      (len(filesToUpdate), updateStatus))
                result = self.__updateFileStatus(transformation, filesToUpdate,
                                                 updateStatus)
                if not result['OK']:
                    self.transLogger.error(
                        '\tRecoverable files were not updated',
                        result['Message'])

            if filesMaxReset:
                self.transLogger.info(
                    '\t%d files are in %s status and have no descendants' %
                    (len(filesMaxReset), ','.join(unrecoverableStatus)))

            if filesWithDescendants:
                # FIXME: we should mark these files with another status such that they are not considered again and again
                # In addition a notification should be sent to the production managers
                self.transLogger.warn(
                    '\t!!!!!!!! Transformation has descendants for files that are not marked as processed !!!!!!!!'
                )
                self.transLogger.warn('\tFiles with descendants:',
                                      ','.join(filesWithDescendants))

        return S_OK()

    #############################################################################
    def __getEligibleTransformations(self, status, typeList):
        """ Select transformations of given status and type.
    """
        res = self.transClient.getTransformations(condDict={
            'Status': status,
            'Type': typeList
        })
        if not res['OK']:
            return res
        transformations = dict((str(prod['TransformationID']), prod['Type'])
                               for prod in res['Value'])
        return S_OK(transformations)

    #############################################################################
    def __selectTransformationFiles(self, transformation, statusList):
        """ Select files, production jobIDs in specified file status for a given transformation.
    """
        # Until a query for files with timestamp can be obtained must rely on the
        # WMS job last update
        res = self.transClient.getTransformationFiles(condDict={
            'TransformationID': transformation,
            'Status': statusList
        })
        if not res['OK']:
            return res
        resDict = {}
        mandatoryKeys = {'LFN', 'TaskID', 'LastUpdate'}
        for fileDict in res['Value']:
            missingKeys = mandatoryKeys - set(fileDict)
            if missingKeys:
                for key in missingKeys:
                    self.transLogger.warn(
                        '%s is mandatory, but missing for:\n\t%s' %
                        (key, str(fileDict)))
            else:
                resDict[fileDict['LFN']] = (fileDict['TaskID'],
                                            fileDict['Status'])
        return S_OK(resDict)

    #############################################################################
    def __obtainWMSJobIDs(self, transformation, fileDict, selectDelay,
                          wmsStatusList):
        """ Group files by the corresponding WMS jobIDs, check the corresponding
        jobs have not been updated for the delay time.  Can't get into any
        mess because we start from files only in MaxReset / Assigned and check
        corresponding jobs.  Mixtures of files for jobs in MaxReset and Assigned
        statuses only possibly include some files in Unused status (not Processed
        for example) that will not be touched.
    """
        taskIDList = sorted(
            set(taskID for taskID, _status in fileDict.values()))
        self.transLogger.verbose(
            "The following %d task IDs correspond to the selected files:\n%s" %
            (len(taskIDList), ', '.join(str(taskID) for taskID in taskIDList)))

        jobFileDict = {}
        olderThan = dateTime() - datetime.timedelta(hours=selectDelay)

        res = self.transClient.getTransformationTasks(
            condDict={
                'TransformationID': transformation,
                'TaskID': taskIDList
            },
            older=olderThan,
            timeStamp='LastUpdateTime')
        if not res['OK']:
            self.transLogger.error("getTransformationTasks returned an error",
                                   '%s' % res['Message'])
            return res

        mandatoryKeys = {
            'TaskID', 'ExternalID', 'LastUpdateTime', 'ExternalStatus'
        }
        for taskDict in res['Value']:
            missingKey = mandatoryKeys - set(taskDict)
            if missingKey:
                for key in missingKey:
                    self.transLogger.warn(
                        'Missing key %s for job dictionary:\n\t%s' %
                        (key, str(taskDict)))
                continue

            taskID = taskDict['TaskID']
            wmsID = taskDict['ExternalID']
            wmsStatus = taskDict['ExternalStatus']

            if not int(wmsID):
                self.transLogger.verbose(
                    'TaskID %s: status is %s (jobID = %s) so will not recheck with WMS'
                    % (taskID, wmsStatus, wmsID))
                continue

            # Exclude jobs not having appropriate WMS status - have to trust that production management status is correct
            if wmsStatus not in wmsStatusList:
                self.transLogger.verbose(
                    'Job %s is in status %s, not in %s so will be ignored' %
                    (wmsID, wmsStatus, ', '.join(wmsStatusList)))
                continue

            # Must map unique files -> jobs in expected state
            jobFileDict[wmsID] = [
                lfn for lfn, (tID, _st) in fileDict.iteritems()
                if int(tID) == int(taskID)
            ]

            self.transLogger.info(
                'Found %d files for taskID %s, jobID %s (%s), last update %s' %
                (len(jobFileDict[wmsID]), taskID, wmsID, wmsStatus,
                 taskDict['LastUpdateTime']))

        return S_OK(jobFileDict)

    #############################################################################

    def __removePendingRequestsJobs(self, jobFileDict):
        """ Before doing anything check that no outstanding requests are pending for the set of WMS jobIDs.
    """
        jobs = jobFileDict.keys()

        level = self.reqClient.log.getLevel()
        self.reqClient.log.setLevel('ERROR')
        result = self.reqClient.getRequestIDsForJobs(jobs)
        self.reqClient.log.setLevel(level)
        if not result['OK']:
            return result

        if not result['Value']['Successful']:
            self.transLogger.verbose('None of the jobs have pending requests')
            return S_OK()

        for jobID, requestID in result['Value']['Successful'].iteritems():
            res = self.reqClient.getRequestStatus(requestID)
            if not res['OK']:
                self.transLogger.error('Failed to get Status for Request',
                                       '%s:%s' % (requestID, res['Message']))
            elif res['Value'] != 'Done':
                # If we fail to get the Status or it is not Done, we must wait, so remove the job from the list.
                del jobFileDict[str(jobID)]
                self.transLogger.verbose(
                    'Removing jobID %s from consideration until requests are completed'
                    % (jobID))

        return S_OK()

    #############################################################################
    def __checkdescendants(self, transformation, jobFileDict):
        """ Check BK descendants for input files, prepare list of actions to be
        taken for recovery.
    """

        jobsThatDidntProduceOutputs = []
        jobsThatProducedOutputs = []

        self.consChecks.prod = transformation
        for job, fileList in jobFileDict.iteritems():
            result = self.consChecks.getDescendants(fileList)
            filesWithDesc = result[0]
            filesWithMultipleDesc = result[2]
            if filesWithDesc or filesWithMultipleDesc:
                jobsThatProducedOutputs.append(job)
            else:
                jobsThatDidntProduceOutputs.append(job)

        return jobsThatDidntProduceOutputs, jobsThatProducedOutputs

    ############################################################################
    def __updateFileStatus(self, transformation, fileList, fileStatus):
        """ Update file list to specified status.
    """
        if not self.enableFlag:
            self.transLogger.info(
                "\tEnable flag is False, would have updated %d files to '%s' status for %s"
                % (len(fileList), fileStatus, transformation))
            return S_OK()

        return self.transClient.setFileStatusForTransformation(
            int(transformation), fileStatus, fileList, force=False)
示例#8
0
  # In case the user asked for specific LFNs
  if not status:
    lfnList = dmScript.getOption('LFNs', [])

  if not status and not lfnList and not runsList and not fromProd and not force:
    gLogger.fatal("You are about to check descendants for all files in a production")
    gLogger.fatal("If you really want to do so, use --Force")
    DIRAC.exit(0)

  from LHCbDIRAC.DataManagementSystem.Client.ConsistencyChecks import ConsistencyChecks
  from LHCbDIRAC.BookkeepingSystem.Client.BKQuery import BKQuery
  from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
  tr = TransformationClient()
  for prod in prodList:
    startTime = time.time()
    cc = ConsistencyChecks()
    # Setting the prod also sets its type
    try:
      cc.prod = prod
    except RuntimeError as e:
      gLogger.exception(lException=e)
      continue
    if fileType and cc.transType in ('Merge', 'MCMerge'):
      gLogger.notice("It is not allowed to select file type for merging transformation", prod)
      continue
    cc.verbose = verbose
    cc.noFC = noFC
    cc.descendantsDepth = depth
    if prod != prodList[0]:
      gLogger.notice("====================")
    gLogger.notice("Processing %s production %d" % (cc.transType, cc.prod))
                          '   Consider also files with replica flag NO')
    Script.parseCommandLine(ignoreErrors=True)

    fixIt = False
    checkAll = False
    production = 0
    for opt, val in Script.getUnprocessedSwitches():
        if opt == 'FixIt':
            fixIt = True
        elif opt == 'CheckAllFlags':
            checkAll = True

    # imports
    from LHCbDIRAC.DataManagementSystem.Client.ConsistencyChecks import ConsistencyChecks
    gLogger.setLevel('INFO')
    cc = ConsistencyChecks()
    bkQuery = dmScript.getBKQuery(visible='All')
    cc.bkQuery = bkQuery
    cc.lfns = dmScript.getOption('LFNs', [])
    productions = dmScript.getOption('Productions', [])

    from LHCbDIRAC.DataManagementSystem.Client.CheckExecutors import doCheckBK2FC
    if productions:
        for prod in productions:
            cc.prod = prod
            gLogger.always("Processing production %d" % cc.prod)
            doCheckBK2FC(cc, checkAll, fixIt)
            gLogger.always("Processed production %d" % cc.prod)
    else:
        doCheckBK2FC(cc, checkAll, fixIt)