Пример #1
0
def loookup_replicas(files, protocol=['xroot', 'root']):
    from DIRAC.DataManagementSystem.Client.DataManager import DataManager
    from DIRAC.Resources.Storage.StorageElement import StorageElement
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient

    dm = DataManager()
    bk = BookkeepingClient()

    files_map = {f.lfn: f for f in files}

    res = dm.getReplicas([f.lfn for f in files], getUrl=False)
    replicas = res.get('Value', {}).get('Successful', {})
    seList = sorted(set(se for f in files for se in replicas.get(f.lfn, {})))
    # banned_SE_list = [se for se in seList if 'CNAF' in se]
    banned_SE_list = []
    print('Found SE list of', seList)

    # Check if files are MDF
    bkRes = bk.getFileTypeVersion([f.lfn for f in files])
    assert not set(lfn for lfn, fileType in bkRes.get('Value', {}).iteritems()
                   if fileType == 'MDF')
    for se in seList:
        # TODO Check if SEs are available
        lfns = [f.lfn for f in files if se in replicas.get(f.lfn, [])]

        if se in banned_SE_list:
            print('Skipping banned SE', se)
            for lfn in lfns:
                files_map[lfn].replicas.append(Replica(lfn, se, banned=True))
            continue
        else:
            print('Looking up replicas for', len(lfns), 'files at', se)

        if lfns:
            res = StorageElement(se).getURL(lfns, protocol=protocol)
            if res['OK']:
                for lfn, pfn in res['Value']['Successful'].items():
                    files_map[lfn].replicas.append(Replica(lfn, se, pfn=pfn))
                for lfn in res['Value']['Failed']:
                    files_map[lfn].replicas.append(Replica(lfn, se, error=res))
            else:
                print('LFN -> PFN lookup failed for', se, 'with error:',
                      res['Message'])
                for lfn in lfns:
                    files_map[lfn].replicas.append(
                        Replica(lfn, se, error=res['Message']))
Пример #2
0
def filterReplicas( opFile, logger = None, dataManager = None ):
  """ filter out banned/invalid source SEs """

  if logger is None:
    logger = gLogger
  if dataManager is None:
    dataManager = DataManager()

  log = logger.getSubLogger( "filterReplicas" )
  ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] }

  replicas = dataManager.getActiveReplicas( opFile.LFN )
  if not replicas["OK"]:
    log.error( 'Failed to get active replicas', replicas["Message"] )
    return replicas
  reNotExists = re.compile( r".*such file.*" )
  replicas = replicas["Value"]
  failed = replicas["Failed"].get( opFile.LFN , "" )
  if reNotExists.match( failed.lower() ):
    opFile.Status = "Failed"
    opFile.Error = failed
    return S_ERROR( failed )

  replicas = replicas["Successful"].get( opFile.LFN, {} )
  noReplicas = False
  if not replicas:
    allReplicas = dataManager.getReplicas( opFile.LFN )
    if allReplicas['OK']:
      allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {} )
      if not allReplicas:
        ret['NoReplicas'].append( None )
        noReplicas = True
      else:
        # We try inactive replicas to see if maybe the file doesn't exist at all
        replicas = allReplicas
      log.warn( "File has no%s replica in File Catalog" % ( '' if noReplicas else ' active' ), opFile.LFN )
    else:
      return allReplicas

  if not opFile.Checksum:
    # Set Checksum to FC checksum if not set in the request
    fcMetadata = FileCatalog().getFileMetadata( opFile.LFN )
    fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' )
    # Replace opFile.Checksum if it doesn't match a valid FC checksum
    if fcChecksum:
      opFile.Checksum = fcChecksum
      opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' )

  for repSEName in replicas:
    repSEMetadata = StorageElement( repSEName ).getFileMetadata( opFile.LFN )
    error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) )
    if error:
      log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) )
      if 'File does not exist' in error:
        ret['NoReplicas'].append( repSEName )
      else:
        ret["NoMetadata"].append( repSEName )
    elif not noReplicas:
      repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN]

      seChecksum = repSEMetadata.get( "Checksum" )
      if not seChecksum and opFile.Checksum:
        opFile.Checksum = None
        opFile.ChecksumType = None
      elif seChecksum and not opFile.Checksum:
        opFile.Checksum = seChecksum
      if not opFile.Checksum or not seChecksum or compareAdler( seChecksum, opFile.Checksum ):
        # # All checksums are OK
        ret["Valid"].append( repSEName )
      else:
        log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN,
                                                              opFile.Checksum,
                                                              repSEName,
                                                              seChecksum ) )
        ret["Bad"].append( repSEName )
    else:
      # If a replica was found somewhere, don't set the file as no replicas
      ret['NoReplicas'] = []

  return S_OK( ret )
Пример #3
0
def filterReplicas(opFile, logger=None, dataManager=None):
    """ filter out banned/invalid source SEs """

    if logger is None:
        logger = gLogger
    if dataManager is None:
        dataManager = DataManager()

    log = logger.getSubLogger("filterReplicas")
    result = defaultdict(list)

    replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False)
    if not replicas["OK"]:
        log.error('Failed to get active replicas', replicas["Message"])
        return replicas
    reNotExists = re.compile(r".*such file.*")
    replicas = replicas["Value"]
    failed = replicas["Failed"].get(opFile.LFN, "")
    if reNotExists.match(failed.lower()):
        opFile.Status = "Failed"
        opFile.Error = failed
        return S_ERROR(failed)

    replicas = replicas["Successful"].get(opFile.LFN, {})
    noReplicas = False
    if not replicas:
        allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False)
        if allReplicas['OK']:
            allReplicas = allReplicas['Value']['Successful'].get(
                opFile.LFN, {})
            if not allReplicas:
                result['NoReplicas'].append(None)
                noReplicas = True
            else:
                # There are replicas but we cannot get metadata because the replica is not active
                result['NoActiveReplicas'] += list(allReplicas)
            log.verbose(
                "File has no%s replica in File Catalog" %
                ('' if noReplicas else ' active'), opFile.LFN)
        else:
            return allReplicas

    if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False:
        # Set Checksum to FC checksum if not set in the request
        fcMetadata = FileCatalog().getFileMetadata(opFile.LFN)
        fcChecksum = fcMetadata.get('Value',
                                    {}).get('Successful',
                                            {}).get(opFile.LFN,
                                                    {}).get('Checksum')
        # Replace opFile.Checksum if it doesn't match a valid FC checksum
        if fcChecksum:
            if hexAdlerToInt(fcChecksum) is not False:
                opFile.Checksum = fcChecksum
                opFile.ChecksumType = fcMetadata['Value']['Successful'][
                    opFile.LFN].get('ChecksumType', 'Adler32')
            else:
                opFile.Checksum = None

    # If no replica was found, return what we collected as information
    if not replicas:
        return S_OK(result)

    for repSEName in replicas:
        repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN)
        error = repSEMetadata.get(
            'Message',
            repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN))
        if error:
            log.warn(
                'unable to get metadata at %s for %s' %
                (repSEName, opFile.LFN), error.replace('\n', ''))
            if 'File does not exist' in error:
                result['NoReplicas'].append(repSEName)
            else:
                result["NoMetadata"].append(repSEName)
        elif not noReplicas:
            repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN]

            seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum"))
            # As from here seChecksum is an integer or False, not a hex string!
            if seChecksum is False and opFile.Checksum:
                result['NoMetadata'].append(repSEName)
            elif not seChecksum and opFile.Checksum:
                opFile.Checksum = None
                opFile.ChecksumType = None
            elif seChecksum and (not opFile.Checksum
                                 or opFile.Checksum == 'False'):
                # Use the SE checksum (convert to hex) and force type to be Adler32
                opFile.Checksum = intAdlerToHex(seChecksum)
                opFile.ChecksumType = 'Adler32'
            if not opFile.Checksum or not seChecksum or compareAdler(
                    intAdlerToHex(seChecksum), opFile.Checksum):
                # # All checksums are OK
                result["Valid"].append(repSEName)
            else:
                log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" %
                         (opFile.LFN, opFile.Checksum, repSEName,
                          intAdlerToHex(seChecksum)))
                result["Bad"].append(repSEName)
        else:
            # If a replica was found somewhere, don't set the file as no replicas
            result['NoReplicas'] = []

    return S_OK(result)
Пример #4
0
                gLogger.always("Couldn't find SEs for site %s" % site)
                continue
            seList = res['Value']
            inputData = sorted(filesAtSite[site])
            if verbose:
                gLogger.always("%sSite: %s, jobs: %s, %d files" %
                               (sep, site, ','.join(jobs), len(inputData)))
            else:
                gLogger.always("%sSite: %s, %d jobs, %d files" %
                               (sep, site, len(jobs), len(inputData)))
            sep = '=====================================\n'
            if verbose:
                gLogger.always('For %s, SEs: %s' % (site, str(seList)))
            pbFound = False

            res = dm.getReplicas(inputData)
            if not res['OK']:
                gLogger.always(
                    "Error getting replicas for %d files" % len(inputData),
                    res['Message'])
                continue
            replicas = res['Value']['Successful']
            notInFC = res['Value']['Failed']
            if notInFC:
                # Check if files has replica flag in the FC, If not ignore the problem
                res = bk.getFileMetadata(notInFC.keys())
                if not res['OK']:
                    gLogger.always(
                        'Error getting BK metadata for %d files' %
                        len(notInFC), res['Message'])
                    continue
Пример #5
0
def filterReplicas(opFile, logger=None, dataManager=None):
  """ filter out banned/invalid source SEs """

  if logger is None:
    logger = gLogger
  if dataManager is None:
    dataManager = DataManager()

  log = logger.getSubLogger("filterReplicas")
  result = defaultdict(list)

  replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False)
  if not replicas["OK"]:
    log.error('Failed to get active replicas', replicas["Message"])
    return replicas
  reNotExists = re.compile(r".*such file.*")
  replicas = replicas["Value"]
  failed = replicas["Failed"].get(opFile.LFN, "")
  if reNotExists.match(failed.lower()):
    opFile.Status = "Failed"
    opFile.Error = failed
    return S_ERROR(failed)

  replicas = replicas["Successful"].get(opFile.LFN, {})
  noReplicas = False
  if not replicas:
    allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False)
    if allReplicas['OK']:
      allReplicas = allReplicas['Value']['Successful'].get(opFile.LFN, {})
      if not allReplicas:
        result['NoReplicas'].append(None)
        noReplicas = True
      else:
        # There are replicas but we cannot get metadata because the replica is not active
        result['NoActiveReplicas'] += list(allReplicas)
      log.verbose("File has no%s replica in File Catalog" % ('' if noReplicas else ' active'), opFile.LFN)
    else:
      return allReplicas

  if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False:
    # Set Checksum to FC checksum if not set in the request
    fcMetadata = FileCatalog().getFileMetadata(opFile.LFN)
    fcChecksum = fcMetadata.get(
        'Value',
        {}).get(
        'Successful',
        {}).get(
        opFile.LFN,
        {}).get('Checksum')
    # Replace opFile.Checksum if it doesn't match a valid FC checksum
    if fcChecksum:
      if hexAdlerToInt(fcChecksum) is not False:
        opFile.Checksum = fcChecksum
        opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get('ChecksumType', 'Adler32')
      else:
        opFile.Checksum = None

  # If no replica was found, return what we collected as information
  if not replicas:
    return S_OK(result)

  for repSEName in replicas:
    repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN)
    error = repSEMetadata.get('Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN))
    if error:
      log.warn('unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', ''))
      if 'File does not exist' in error:
        result['NoReplicas'].append(repSEName)
      else:
        result["NoMetadata"].append(repSEName)
    elif not noReplicas:
      repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN]

      seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum"))
      # As from here seChecksum is an integer or False, not a hex string!
      if seChecksum is False and opFile.Checksum:
        result['NoMetadata'].append(repSEName)
      elif not seChecksum and opFile.Checksum:
        opFile.Checksum = None
        opFile.ChecksumType = None
      elif seChecksum and (not opFile.Checksum or opFile.Checksum == 'False'):
        # Use the SE checksum (convert to hex) and force type to be Adler32
        opFile.Checksum = intAdlerToHex(seChecksum)
        opFile.ChecksumType = 'Adler32'
      if not opFile.Checksum or not seChecksum or compareAdler(
              intAdlerToHex(seChecksum), opFile.Checksum):
        # # All checksums are OK
        result["Valid"].append(repSEName)
      else:
        log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" %
                 (opFile.LFN, opFile.Checksum, repSEName, intAdlerToHex(seChecksum)))
        result["Bad"].append(repSEName)
    else:
      # If a replica was found somewhere, don't set the file as no replicas
      result['NoReplicas'] = []

  return S_OK(result)
Пример #6
0
def filterReplicas(opFile, logger=None, dataManager=None):
    """ filter out banned/invalid source SEs """

    if logger is None:
        logger = gLogger
    if dataManager is None:
        dataManager = DataManager()

    log = logger.getSubLogger("filterReplicas")
    ret = {"Valid": [], "NoMetadata": [], "Bad": [], "NoReplicas": [], "NoPFN": []}

    replicas = dataManager.getActiveReplicas(opFile.LFN)
    if not replicas["OK"]:
        log.error("Failed to get active replicas", replicas["Message"])
        return replicas
    reNotExists = re.compile(r".*such file.*")
    replicas = replicas["Value"]
    failed = replicas["Failed"].get(opFile.LFN, "")
    if reNotExists.match(failed.lower()):
        opFile.Status = "Failed"
        opFile.Error = failed
        return S_ERROR(failed)

    replicas = replicas["Successful"].get(opFile.LFN, {})
    noReplicas = False
    if not replicas:
        allReplicas = dataManager.getReplicas(opFile.LFN)
        if allReplicas["OK"]:
            allReplicas = allReplicas["Value"]["Successful"].get(opFile.LFN, {})
            if not allReplicas:
                ret["NoReplicas"].append(None)
                noReplicas = True
            else:
                # We try inactive replicas to see if maybe the file doesn't exist at all
                replicas = allReplicas
            log.warn("File has no%s replica in File Catalog" % ("" if noReplicas else " active"), opFile.LFN)
        else:
            return allReplicas

    if not opFile.Checksum:
        # Set Checksum to FC checksum if not set in the request
        fcMetadata = FileCatalog().getFileMetadata(opFile.LFN)
        fcChecksum = fcMetadata.get("Value", {}).get("Successful", {}).get(opFile.LFN, {}).get("Checksum")
        # Replace opFile.Checksum if it doesn't match a valid FC checksum
        if fcChecksum:
            opFile.Checksum = fcChecksum
            opFile.ChecksumType = fcMetadata["Value"]["Successful"][opFile.LFN].get("ChecksumType", "Adler32")

    for repSEName in replicas:
        repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN)
        error = repSEMetadata.get("Message", repSEMetadata.get("Value", {}).get("Failed", {}).get(opFile.LFN))
        if error:
            log.warn("unable to get metadata at %s for %s" % (repSEName, opFile.LFN), error.replace("\n", ""))
            if "File does not exist" in error:
                ret["NoReplicas"].append(repSEName)
            else:
                ret["NoMetadata"].append(repSEName)
        elif not noReplicas:
            repSEMetadata = repSEMetadata["Value"]["Successful"][opFile.LFN]

            seChecksum = repSEMetadata.get("Checksum")
            if not seChecksum and opFile.Checksum:
                opFile.Checksum = None
                opFile.ChecksumType = None
            elif seChecksum and not opFile.Checksum:
                opFile.Checksum = seChecksum
            if not opFile.Checksum or not seChecksum or compareAdler(seChecksum, opFile.Checksum):
                # # All checksums are OK
                ret["Valid"].append(repSEName)
            else:
                log.warn(
                    " %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, seChecksum)
                )
                ret["Bad"].append(repSEName)
        else:
            # If a replica was found somewhere, don't set the file as no replicas
            ret["NoReplicas"] = []

    return S_OK(ret)
Пример #7
0
def filterReplicas( opFile, logger = None, dataManager = None ):
  """ filter out banned/invalid source SEs """

  if logger is None:
    logger = gLogger
  if dataManager is None:
    dataManager = DataManager()

  log = logger.getSubLogger( "filterReplicas" )
  ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] }

  replicas = dataManager.getActiveReplicas( opFile.LFN )
  if not replicas["OK"]:
    log.error( 'Failed to get active replicas', replicas["Message"] )
    return replicas
  reNotExists = re.compile( r".*such file.*" )
  replicas = replicas["Value"]
  failed = replicas["Failed"].get( opFile.LFN , "" )
  if reNotExists.match( failed.lower() ):
    opFile.Status = "Failed"
    opFile.Error = failed
    return S_ERROR( failed )

  replicas = replicas["Successful"].get( opFile.LFN, {} )
  noReplicas = False
  if not replicas:
    allReplicas = dataManager.getReplicas( opFile.LFN )
    if allReplicas['OK']:
      allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {} )
      if not allReplicas:
        ret['NoReplicas'].append( None )
        noReplicas = True
      else:
        # We try inactive replicas to see if maybe the file doesn't exist at all
        replicas = allReplicas
      log.warn( "File has no%s replica in File Catalog" % ( '' if noReplicas else ' active' ), opFile.LFN )
    else:
      return allReplicas

  if not opFile.Checksum or hexAdlerToInt( opFile.Checksum ) == False:
    # Set Checksum to FC checksum if not set in the request
    fcMetadata = FileCatalog().getFileMetadata( opFile.LFN )
    fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' )
    # Replace opFile.Checksum if it doesn't match a valid FC checksum
    if fcChecksum:
      if hexAdlerToInt( fcChecksum ) != False:
        opFile.Checksum = fcChecksum
        opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' )
      else:
        opFile.Checksum = None

  for repSEName in replicas:
    repSEMetadata = StorageElement( repSEName ).getFileMetadata( opFile.LFN )
    error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) )
    if error:
      log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) )
      if 'File does not exist' in error:
        ret['NoReplicas'].append( repSEName )
      else:
        ret["NoMetadata"].append( repSEName )
    elif not noReplicas:
      repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN]

      seChecksum = hexAdlerToInt( repSEMetadata.get( "Checksum" ) )
      if seChecksum == False and opFile.Checksum:
        ret['NoMetadata'].append( repSEName )
      elif not seChecksum and opFile.Checksum:
        opFile.Checksum = None
        opFile.ChecksumType = None
      elif seChecksum and ( not opFile.Checksum or opFile.Checksum == 'False' ):
        # Use the SE checksum and force type to be Adler32
        opFile.Checksum = seChecksum
        opFile.ChecksumType = 'Adler32'
      if not opFile.Checksum or not seChecksum or compareAdler( seChecksum, opFile.Checksum ):
        # # All checksums are OK
        ret["Valid"].append( repSEName )
      else:
        log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN,
                                                              opFile.Checksum,
                                                              repSEName,
                                                              seChecksum ) )
        ret["Bad"].append( repSEName )
    else:
      # If a replica was found somewhere, don't set the file as no replicas
      ret['NoReplicas'] = []

  return S_OK( ret )
Пример #8
0
class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)
def execute():
    """
  Parse the options and execute the script
  """
    bkQuery = dmScript.getBKQuery()
    fileType = bkQuery.getFileTypeList()
    if not set(fileType) & {'FULL.DST', 'RDST', 'SDST'}:
        gLogger.error("Please provide a reconstruction BK path")
        DIRAC.exit(1)

    from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
    from DIRAC.DataManagementSystem.Client.DataManager import DataManager
    from DIRAC.Core.Utilities.List import breakListIntoChunks
    from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
    from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers, resolveSEGroup

    bk = BookkeepingClient()
    tr = TransformationClient()
    dm = DataManager()
    dmsHelper = DMSHelpers()

    bkQueryDict = bkQuery.getQueryDict()
    gLogger.notice("For BK Query:", str(bkQueryDict))
    progressBar = ProgressBar(1, title="Running BK query...", step=1)
    res = bk.getFilesWithMetadata(bkQueryDict)
    if not res['OK']:
        gLogger.error("Error getting files from BK", res['Message'])
        DIRAC.exit(2)

    if 'ParameterNames' in res.get('Value', {}):
        parameterNames = res['Value']['ParameterNames']
        info = res['Value']['Records']
        progressBar.endLoop("Obtained %d files" % len(info))
    else:
        gLogger.error('\nNo metadata found')
        DIRAC.exit(3)
    lfns = []
    runLFNs = {}
    for item in info:
        metadata = dict(zip(parameterNames, item))
        lfn = metadata['FileName']
        lfns.append(lfn)
        runLFNs.setdefault(metadata['RunNumber'], []).append(lfn)

    chunkSize = 1000
    progressBar = ProgressBar(len(lfns),
                              title='Getting replicas of %d files' % len(lfns),
                              chunk=chunkSize)
    replicas = {}
    errors = {}
    for lfnChunk in breakListIntoChunks(lfns, chunkSize):
        progressBar.loop()
        res = dm.getReplicas(lfnChunk, getUrl=False)
        if not res['OK']:
            errors.setdefault(res['Message'], []).extend(lfnChunk)
        else:
            replicas.update(res['Value']['Successful'])
            for lfn, error in res['Value']['Failed'].iteritems():
                errors.setdefault(error, []).append(lfn)
    progressBar.endLoop()
    for error, lfns in errors.iteritems():
        gLogger.error(error, 'for %d files' % len(lfns))

    tier1RDST = set(resolveSEGroup('Tier1-RDST'))
    setOK = 0
    errors = {}
    progressBar = ProgressBar(len(runLFNs),
                              title='Defining destination for %d runs' %
                              len(runLFNs),
                              step=10)
    for run, lfns in runLFNs.iteritems():
        progressBar.loop()
        res = tr.getDestinationForRun(run)
        if res.get('Value'):
            errors.setdefault('Destination already set', []).append(str(run))
            continue
        # print 'Run', run, len( lfns ), 'Files', lfns[:3]
        seCounts = {}
        for lfn in lfns:
            for se in tier1RDST.intersection(replicas.get(lfn, [])):
                seCounts[se] = seCounts.setdefault(se, 0) + 1
        # print seCounts
        maxi = 0
        seMax = None
        for se, count in seCounts.iteritems():
            if count > maxi:
                seMax = se
                maxi = count
        if not seMax:
            errors.setdefault('No SE found, use CERN-RDST',
                              []).append(str(run))
            seMax = 'CERN-RDST'
        # SE found, get its site
        res = dmsHelper.getLocalSiteForSE(seMax)
        if res['OK']:
            site = res['Value']
            res = tr.setDestinationForRun(run, site)
            if not res['OK']:
                errors.setdefault(res['Message'], []).append(str(run))
            else:
                setOK += 1
    progressBar.endLoop('Successfully set destination for %d runs' % setOK)
    for error, runs in errors.iteritems():
        gLogger.error(error, 'for runs %s' % ','.join(runs))