Exemplo n.º 1
0
class PluginUtilities(object):
  """
  Utility class used by plugins
  """

  def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None,
               debug=False, transInThread=None, transID=None):
    """
    c'tor

    Setting defaults
    """
    # clients
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager
    if fc is None:
      self.fc = FileCatalog()
    else:
      self.fc = fc

    self.dmsHelper = DMSHelpers()

    self.plugin = plugin
    self.transID = transID
    self.params = {}
    self.groupSize = 0
    self.maxFiles = 0
    self.cachedLFNSize = {}
    self.transString = ''
    self.debug = debug
    if transInThread is None:
      self.transInThread = {}
    else:
      self.transInThread = transInThread

    self.log = gLogger.getSubLogger(plugin)

  def logVerbose(self, message, param=''):
    """ logger helper """
    if self.debug:
      self.log.info('(V)' + self.transString + message, param)
    else:
      self.log.verbose(self.transString + message, param)

  def logDebug(self, message, param=''):
    """ logger helper """
    self.log.debug(self.transString + message, param)

  def logInfo(self, message, param=''):
    """ logger helper """
    self.log.info(self.transString + message, param)

  def logWarn(self, message, param=''):
    """ logger helper """
    self.log.warn(self.transString + message, param)

  def logError(self, message, param=''):
    """ logger helper """
    self.log.error(self.transString + message, param)

  def logException(self, message, param='', lException=False):
    """ logger helper """
    self.log.exception(self.transString + message, param, lException)

  def setParameters(self, params):
    """ Set the transformation parameters and extract transID """
    self.params = params
    self.transID = params['TransformationID']
    self.transString = self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)

  # @timeThis
  def groupByReplicas(self, files, status):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not files:
      return S_OK(tasks)

    files = dict(files)

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam('GroupSize', 10)
    flush = (status == 'Flush')
    self.logVerbose(
        "groupByReplicas: %d files, groupSize %d, flush %s" %
        (len(files), self.groupSize, flush))

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)
      self.logDebug("fileGroups set: ", seFiles)

      for replicaSE in sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks(lfns, self.groupSize)
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if flush or (len(taskLfns) >= self.groupSize):
              tasks.append((replicaSE, taskLfns))
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop(lfn)
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose(
          "groupByReplicas: %d tasks created (groupSE %s)" %
          (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" %
          len(files))
      nTasks = len(tasks)

    return S_OK(tasks)

  def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
    """
    Split files in groups according to the size and create tasks for a given SE
    """
    tasks = []
    if fileSizes is None:
      fileSizes = self._getFileSize(lfns).get('Value')
    if fileSizes is None:
      self.logWarn('Error getting file sizes, no tasks created')
      return tasks
    taskLfns = []
    taskSize = 0
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000
    if not self.maxFiles:
      # FIXME: prepare for chaging the name of the ambiguoug  CS option
      self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100))
    lfns = sorted(lfns, key=fileSizes.get)
    for lfn in lfns:
      size = fileSizes.get(lfn, 0)
      if size:
        if size > self.groupSize:
          tasks.append((replicaSE, [lfn]))
        else:
          taskSize += size
          taskLfns.append(lfn)
          if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles):
            tasks.append((replicaSE, taskLfns))
            taskLfns = []
            taskSize = 0
    if flush and taskLfns:
      tasks.append((replicaSE, taskLfns))
    if not tasks and not flush and taskLfns:
      self.logVerbose(
          'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' %
          (taskSize, self.groupSize))
    return tasks

  # @timeThis
  def groupBySize(self, files, status):
    """
    Generate a task for a given amount of data
    """
    tasks = []
    nTasks = 0

    if not len(files):
      return S_OK(tasks)

    files = dict(files)
    # Parameters
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000
    flush = (status == 'Flush')
    self.logVerbose(
        "groupBySize: %d files, groupSize: %d, flush: %s" %
        (len(files), self.groupSize, flush))

    # Get the file sizes
    res = self._getFileSize(files.keys())
    if not res['OK']:
      return res
    fileSizes = res['Value']

    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)

      for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush)
        lfnsInTasks = []
        for task in newTasks:
          lfnsInTasks += task[1]
        tasks += newTasks

        # Remove the selected files from the size cache
        self.clearCachedFileSize(lfnsInTasks)
        if not groupSE:
          # Remove files from other SEs
          for se in [se for se in seFiles if se != replicaSE]:
            seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
        # Remove files from global list
        for lfn in lfnsInTasks:
          files.pop(lfn)

      self.logVerbose(
          "groupBySize: %d tasks created with groupSE %s" %
          (len(tasks) - nTasks, str(groupSE)))
      self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files))
      nTasks = len(tasks)

    self.logVerbose("Grouped %d files by size" % len(files))
    return S_OK(tasks)

  def getExistingCounters(self, normalise=False, requestedSites=[]):
    res = self.transClient.getCounters('TransformationFiles', ['UsedSE'],
                                       {'TransformationID': self.params['TransformationID']})
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE(se)
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares(usageDict)
    return S_OK(usageDict)

  # @timeThis
  def _getFileSize(self, lfns):
    """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
    lfns = list(lfns)
    cachedLFNSize = dict(self.cachedLFNSize)

    fileSizes = {}
    for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
      fileSizes[lfn] = cachedLFNSize[lfn]
    self.logDebug(
        "Found cache hit for File size for %d files out of %d" %
        (len(fileSizes), len(lfns)))
    lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
    if lfns:
      fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
      if not fileSizes['OK']:
        self.logError(fileSizes['Message'])
        return fileSizes
      fileSizes = fileSizes['Value']
    return S_OK(fileSizes)

  # @timeThis
  def _getFileSizeFromCatalog(self, lfns, fileSizes):
    """
    Get file size from the catalog
    """
    lfns = list(lfns)
    fileSizes = dict(fileSizes)

    res = self.fc.getFileSize(lfns)
    if not res['OK']:
      return S_ERROR("Failed to get sizes for all files: %s" % res['Message'])
    if res['Value']['Failed']:
      errorReason = sorted(set(res['Value']['Failed'].values()))
      self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason)
    fileSizes.update(res['Value']['Successful'])
    self.cachedLFNSize.update((res['Value']['Successful']))
    self.logVerbose("Got size of %d files from catalog" % len(lfns))
    return S_OK(fileSizes)

  def clearCachedFileSize(self, lfns):
    """ Utility function
    """
    for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
      self.cachedLFNSize.pop(lfn)

  def getPluginParam(self, name, default=None):
    """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
    # get the value of a parameter looking 1st in the CS
    if default is not None:
      valueType = type(default)
    else:
      valueType = None
    # First look at a generic value...
    optionPath = "TransformationPlugins/%s" % (name)
    value = Operations().getValue(optionPath, None)
    self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
    # Then look at a plugin-specific value
    optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
    value = Operations().getValue(optionPath, value)
    self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
    if value is not None:
      default = value
    # Finally look at a transformation-specific parameter
    value = self.params.get(name, default)
    self.logVerbose(
        "Transformation plugin param %s: '%s'. Convert to %s" %
        (name, value, str(valueType)))
    if valueType and not isinstance(value, valueType):
      if valueType is list:
        try:
          value = ast.literal_eval(value) if value and value != 'None' else []
        # literal_eval('SE-DST') -> ValueError
        # literal_eval('SE_MC-DST') -> SyntaxError
        # Don't ask...
        except (ValueError, SyntaxError):
          value = [val for val in value.replace(' ', '').split(',') if val]

      elif valueType is int:
        value = int(value)
      elif valueType is float:
        value = float(value)
      elif valueType is bool:
        if value in ('False', 'No', 'None', None, 0):
          value = False
        else:
          value = bool(value)
      elif valueType is not str:
        self.logWarn(
            "Unknown parameter type (%s) for %s, passed as string" %
            (str(valueType), name))
    self.logVerbose("Final plugin param %s: '%s'" % (name, value))
    return value

  @staticmethod
  def _normaliseShares(originalShares):
    """ Normalize shares to 1 """
    total = sum(float(share) for share in originalShares.values())
    return dict([(site, 100. * float(share) / total if total else 0.)
                 for site, share in originalShares.items()])

  def uniqueSEs(self, ses):
    """ return a list of SEs that are not physically the same """
    newSEs = []
    for se in ses:
      if not self.isSameSEInList(se, newSEs):
        newSEs.append(se)
    return newSEs

  def isSameSE(self, se1, se2):
    """ Check if 2 SEs are indeed the same.

        :param se1: name of the first StorageElement
        :param se2: name of the second StorageElement

        :returns: True/False if they are considered the same.
                  See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE`
    """
    if se1 == se2:
      return True

    return StorageElement(se1).isSameSE(StorageElement(se2))

  def isSameSEInList(self, se1, seList):
    """ Check if an SE is the same as any in a list """
    if se1 in seList:
      return True
    for se in seList:
      if self.isSameSE(se1, se):
        return True
    return False

  def closerSEs(self, existingSEs, targetSEs, local=False):
    """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
    setTarget = set(targetSEs)
    sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)])
    targetSEs = setTarget - set(sameSEs)
    if targetSEs:
      # Some SEs are left, look for sites
      existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value')
                       for se in existingSEs]
      existingSites = set([site for site in existingSites if site])
      closeSEs = set([se for se in targetSEs
                      if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites])
      # print existingSEs, existingSites, targetSEs, closeSEs
      otherSEs = targetSEs - closeSEs
      targetSEs = list(closeSEs)
      random.shuffle(targetSEs)
      if not local and otherSEs:
        otherSEs = list(otherSEs)
        random.shuffle(otherSEs)
        targetSEs += otherSEs
    else:
      targetSEs = []
    return (targetSEs + list(sameSEs)) if not local else targetSEs
Exemplo n.º 2
0
class PluginUtilities(object):
  """
  Utility class used by plugins
  """

  def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None,
               debug=False, transInThread=None, transID=None):
    """
    c'tor

    Setting defaults
    """
    # clients
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager
    if fc is None:
      self.fc = FileCatalog()
    else:
      self.fc = fc

    self.dmsHelper = DMSHelpers()

    self.plugin = plugin
    self.transID = transID
    self.params = {}
    self.groupSize = 0
    self.maxFiles = 0
    self.cachedLFNSize = {}
    self.transString = ''
    self.debug = debug
    if transInThread is None:
      self.transInThread = {}
    else:
      self.transInThread = transInThread

    self.log = gLogger.getSubLogger(self.plugin +
                                    self.transInThread.get(self.transID, ' [NoThread] [%s] ' % self.transID))
    # FIXME: This doesn't work (yet) but should soon, will allow scripts to get the context
    self.log.showHeaders(True)

  def logVerbose(self, message, param=''):
    """ logger helper """
    if self.debug:
      log = gLogger.getSubLogger(self.plugin + ' (V)' +
                                 self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID))
      log.info(message, param)
    else:
      self.log.verbose(message, param)

  def logDebug(self, message, param=''):
    """ logger helper """
    self.log.debug(message, param)

  def logInfo(self, message, param=''):
    """ logger helper """
    self.log.info(message, param)

  def logWarn(self, message, param=''):
    """ logger helper """
    self.log.warn(message, param)

  def logError(self, message, param=''):
    """ logger helper """
    self.log.error(message, param)

  def logException(self, message, param='', lException=False):
    """ logger helper """
    self.log.exception(message, param, lException)

  def setParameters(self, params):
    """ Set the transformation parameters and extract transID """
    self.params = params
    self.transID = params['TransformationID']
    self.log = gLogger.getSubLogger(self.plugin +
                                    self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID))

  # @timeThis
  def groupByReplicas(self, files, status):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not files:
      return S_OK(tasks)

    files = dict(files)

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam('GroupSize', 10)
    flush = (status == 'Flush')
    self.logVerbose(
        "groupByReplicas: %d files, groupSize %d, flush %s" %
        (len(files), self.groupSize, flush))

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)
      self.logDebug("fileGroups set: ", seFiles)

      for replicaSE in sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks(lfns, self.groupSize)
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if flush or (len(taskLfns) >= self.groupSize):
              tasks.append((replicaSE, taskLfns))
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop(lfn)
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose(
          "groupByReplicas: %d tasks created (groupSE %s)" %
          (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" %
          len(files))
      nTasks = len(tasks)

    return S_OK(tasks)

  def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
    """
    Split files in groups according to the size and create tasks for a given SE
    """
    tasks = []
    if fileSizes is None:
      fileSizes = self._getFileSize(lfns).get('Value')
    if fileSizes is None:
      self.logWarn('Error getting file sizes, no tasks created')
      return tasks
    taskLfns = []
    taskSize = 0
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000
    if not self.maxFiles:
      # FIXME: prepare for chaging the name of the ambiguoug  CS option
      self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100))
    lfns = sorted(lfns, key=fileSizes.get)
    for lfn in lfns:
      size = fileSizes.get(lfn, 0)
      if size:
        if size > self.groupSize:
          tasks.append((replicaSE, [lfn]))
        else:
          taskSize += size
          taskLfns.append(lfn)
          if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles):
            tasks.append((replicaSE, taskLfns))
            taskLfns = []
            taskSize = 0
    if flush and taskLfns:
      tasks.append((replicaSE, taskLfns))
    if not tasks and not flush and taskLfns:
      self.logVerbose(
          'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' %
          (taskSize, self.groupSize))
    return tasks

  # @timeThis
  def groupBySize(self, files, status):
    """
    Generate a task for a given amount of data
    """
    tasks = []
    nTasks = 0

    if not len(files):
      return S_OK(tasks)

    files = dict(files)
    # Parameters
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000
    flush = (status == 'Flush')
    self.logVerbose(
        "groupBySize: %d files, groupSize: %d, flush: %s" %
        (len(files), self.groupSize, flush))

    # Get the file sizes
    res = self._getFileSize(list(files))
    if not res['OK']:
      return res
    fileSizes = res['Value']

    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)

      for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush)
        lfnsInTasks = []
        for task in newTasks:
          lfnsInTasks += task[1]
        tasks += newTasks

        # Remove the selected files from the size cache
        self.clearCachedFileSize(lfnsInTasks)
        if not groupSE:
          # Remove files from other SEs
          for se in [se for se in seFiles if se != replicaSE]:
            seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
        # Remove files from global list
        for lfn in lfnsInTasks:
          files.pop(lfn)

      self.logVerbose(
          "groupBySize: %d tasks created with groupSE %s" %
          (len(tasks) - nTasks, str(groupSE)))
      self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files))
      nTasks = len(tasks)

    self.logVerbose("Grouped %d files by size" % len(files))
    return S_OK(tasks)

  def getExistingCounters(self, normalise=False, requestedSites=[]):
    res = self.transClient.getCounters('TransformationFiles', ['UsedSE'],
                                       {'TransformationID': self.params['TransformationID']})
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE(se)
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares(usageDict)
    return S_OK(usageDict)

  # @timeThis
  def _getFileSize(self, lfns):
    """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
    lfns = list(lfns)
    cachedLFNSize = dict(self.cachedLFNSize)

    fileSizes = {}
    for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
      fileSizes[lfn] = cachedLFNSize[lfn]
    self.logDebug(
        "Found cache hit for File size for %d files out of %d" %
        (len(fileSizes), len(lfns)))
    lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
    if lfns:
      fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
      if not fileSizes['OK']:
        self.logError(fileSizes['Message'])
        return fileSizes
      fileSizes = fileSizes['Value']
    return S_OK(fileSizes)

  # @timeThis
  def _getFileSizeFromCatalog(self, lfns, fileSizes):
    """
    Get file size from the catalog
    """
    lfns = list(lfns)
    fileSizes = dict(fileSizes)

    res = self.fc.getFileSize(lfns)
    if not res['OK']:
      return S_ERROR("Failed to get sizes for all files: %s" % res['Message'])
    if res['Value']['Failed']:
      errorReason = sorted(set(res['Value']['Failed'].values()))
      self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason)
    fileSizes.update(res['Value']['Successful'])
    self.cachedLFNSize.update((res['Value']['Successful']))
    self.logVerbose("Got size of %d files from catalog" % len(lfns))
    return S_OK(fileSizes)

  def clearCachedFileSize(self, lfns):
    """ Utility function
    """
    for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
      self.cachedLFNSize.pop(lfn)

  def getPluginParam(self, name, default=None):
    """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
    # get the value of a parameter looking 1st in the CS
    if default is not None:
      valueType = type(default)
    else:
      valueType = None
    # First look at a generic value...
    optionPath = "TransformationPlugins/%s" % (name)
    value = Operations().getValue(optionPath, None)
    self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
    # Then look at a plugin-specific value
    optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
    value = Operations().getValue(optionPath, value)
    self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
    if value is not None:
      default = value
    # Finally look at a transformation-specific parameter
    value = self.params.get(name, default)
    self.logVerbose(
        "Transformation plugin param %s: '%s'. Convert to %s" %
        (name, value, str(valueType)))
    if valueType and not isinstance(value, valueType):
      if valueType is list:
        try:
          value = ast.literal_eval(value) if value and value != 'None' else []
        # literal_eval('SE-DST') -> ValueError
        # literal_eval('SE_MC-DST') -> SyntaxError
        # Don't ask...
        except (ValueError, SyntaxError):
          value = [val for val in value.replace(' ', '').split(',') if val]

      elif valueType is int:
        value = int(value)
      elif valueType is float:
        value = float(value)
      elif valueType is bool:
        if value in ('False', 'No', 'None', None, 0):
          value = False
        else:
          value = bool(value)
      elif valueType is not str:
        self.logWarn(
            "Unknown parameter type (%s) for %s, passed as string" %
            (str(valueType), name))
    self.logVerbose("Final plugin param %s: '%s'" % (name, value))
    return value

  @staticmethod
  def _normaliseShares(originalShares):
    """ Normalize shares to 1 """
    total = sum(float(share) for share in originalShares.values())
    return dict([(site, 100. * float(share) / total if total else 0.)
                 for site, share in originalShares.items()])

  def uniqueSEs(self, ses):
    """ return a list of SEs that are not physically the same """
    newSEs = []
    for se in ses:
      if not self.isSameSEInList(se, newSEs):
        newSEs.append(se)
    return newSEs

  def isSameSE(self, se1, se2):
    """ Check if 2 SEs are indeed the same.

        :param se1: name of the first StorageElement
        :param se2: name of the second StorageElement

        :returns: True/False if they are considered the same.
                  See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE`
    """
    if se1 == se2:
      return True

    return StorageElement(se1).isSameSE(StorageElement(se2))

  def isSameSEInList(self, se1, seList):
    """ Check if an SE is the same as any in a list """
    if se1 in seList:
      return True
    for se in seList:
      if self.isSameSE(se1, se):
        return True
    return False

  def closerSEs(self, existingSEs, targetSEs, local=False):
    """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
    setTarget = set(targetSEs)
    sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)])
    targetSEs = setTarget - set(sameSEs)
    if targetSEs:
      # Some SEs are left, look for sites
      existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value')
                       for se in existingSEs]
      existingSites = set([site for site in existingSites if site])
      closeSEs = set([se for se in targetSEs
                      if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites])
      # print existingSEs, existingSites, targetSEs, closeSEs
      otherSEs = targetSEs - closeSEs
      targetSEs = list(closeSEs)
      random.shuffle(targetSEs)
      if not local and otherSEs:
        otherSEs = list(otherSEs)
        random.shuffle(otherSEs)
        targetSEs += otherSEs
    else:
      targetSEs = []
    return (targetSEs + list(sameSEs)) if not local else targetSEs

  @staticmethod
  def seParamtoList(inputParam):
    """Transform ``inputParam`` to list.

    :param inputParam: can be string, list, or string representation of list
    :returns: list
    """
    if not inputParam:
      return []
    if inputParam.count('['):
      return eval(inputParam)  # pylint: disable=eval-used
    elif isinstance(inputParam, list):
      return inputParam
    return [inputParam]
Exemplo n.º 3
0
class PluginUtilities(object):
    """
  Utility class used by plugins
  """
    def __init__(self,
                 plugin='Standard',
                 transClient=None,
                 dataManager=None,
                 fc=None,
                 debug=False,
                 transInThread=None,
                 transID=None):
        """
    c'tor

    Setting defaults
    """
        # clients
        if transClient is None:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        if dataManager is None:
            self.dm = DataManager()
        else:
            self.dm = dataManager
        if fc is None:
            self.fc = FileCatalog()
        else:
            self.fc = fc

        self.dmsHelper = DMSHelpers()

        self.plugin = plugin
        self.transID = transID
        self.params = {}
        self.groupSize = 0
        self.maxFiles = 0
        self.cachedLFNSize = {}
        self.transString = ''
        self.debug = debug
        self.seConfig = {}
        if transInThread is None:
            self.transInThread = {}
        else:
            self.transInThread = transInThread

        self.log = gLogger.getSubLogger("%s/PluginUtilities" % plugin)

    def logVerbose(self, message, param=''):
        if self.debug:
            self.log.info('(V)' + self.transString + message, param)
        else:
            self.log.verbose(self.transString + message, param)

    def logDebug(self, message, param=''):
        self.log.debug(self.transString + message, param)

    def logInfo(self, message, param=''):
        self.log.info(self.transString + message, param)

    def logWarn(self, message, param=''):
        self.log.warn(self.transString + message, param)

    def logError(self, message, param=''):
        self.log.error(self.transString + message, param)

    def logException(self, message, param='', lException=False):
        self.log.exception(self.transString + message, param, lException)

    def setParameters(self, params):
        self.params = params
        self.transID = params['TransformationID']
        self.transString = self.transInThread.get(
            self.transID,
            ' [NoThread] [%d] ' % self.transID) + '%s: ' % self.plugin

    @timeThis
    def groupByReplicas(self, files, status):
        """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
        tasks = []
        nTasks = 0

        if not len(files):
            return S_OK(tasks)

        files = dict(files)

        # Parameters
        if not self.groupSize:
            self.groupSize = self.getPluginParam('GroupSize', 10)
        flush = (status == 'Flush')
        self.logVerbose("groupByReplicas: %d files, groupSize %d, flush %s" %
                        (len(files), self.groupSize, flush))

        # Consider files by groups of SEs, a file is only in one group
        # Then consider files site by site, but a file can now be at more than one site
        for groupSE in (True, False):
            if not files:
                break
            seFiles = getFileGroups(files, groupSE=groupSE)
            self.logDebug("fileGroups set: ", seFiles)

            for replicaSE in sortSEs(seFiles):
                lfns = seFiles[replicaSE]
                if lfns:
                    tasksLfns = breakListIntoChunks(lfns, self.groupSize)
                    lfnsInTasks = []
                    for taskLfns in tasksLfns:
                        if (flush and not groupSE) or (len(taskLfns) >=
                                                       self.groupSize):
                            tasks.append((replicaSE, taskLfns))
                            lfnsInTasks += taskLfns
                    # In case the file was at more than one site, remove it from the other sites' list
                    # Remove files from global list
                    for lfn in lfnsInTasks:
                        files.pop(lfn)
                    if not groupSE:
                        # Remove files from other SEs
                        for se in [se for se in seFiles if se != replicaSE]:
                            seFiles[se] = [
                                lfn for lfn in seFiles[se]
                                if lfn not in lfnsInTasks
                            ]
            self.logVerbose(
                "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks"
                % (len(tasks) - nTasks, str(groupSE), len(files)))
            nTasks = len(tasks)

        return S_OK(tasks)

    def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
        """
    Split files in groups according to the size and create tasks for a given SE
    """
        tasks = []
        if fileSizes is None:
            fileSizes = self._getFileSize(lfns).get('Value')
        if fileSizes is None:
            self.logWarn('Error getting file sizes, no tasks created')
            return tasks
        taskLfns = []
        taskSize = 0
        if not self.groupSize:
            self.groupSize = float(
                self.getPluginParam('GroupSize', 1.)
            ) * 1000 * 1000 * 1000  # input size in GB converted to bytes
        if not self.maxFiles:
            self.maxFiles = self.getPluginParam('MaxFiles', 100)
        lfns = sorted(lfns, key=fileSizes.get)
        for lfn in lfns:
            size = fileSizes.get(lfn, 0)
            if size:
                if size > self.groupSize:
                    tasks.append((replicaSE, [lfn]))
                else:
                    taskSize += size
                    taskLfns.append(lfn)
                    if (taskSize > self.groupSize) or (len(taskLfns) >=
                                                       self.maxFiles):
                        tasks.append((replicaSE, taskLfns))
                        taskLfns = []
                        taskSize = 0
        if flush and taskLfns:
            tasks.append((replicaSE, taskLfns))
        return tasks

    @timeThis
    def groupBySize(self, files, status):
        """
    Generate a task for a given amount of data
    """
        tasks = []
        nTasks = 0

        if not len(files):
            return S_OK(tasks)

        files = dict(files)
        # Parameters
        if not self.groupSize:
            self.groupSize = float(self.getPluginParam(
                'GroupSize',
                1)) * 1000 * 1000 * 1000  # input size in GB converted to bytes
        flush = (status == 'Flush')
        self.logVerbose("groupBySize: %d files, groupSize: %d, flush: %s" %
                        (len(files), self.groupSize, flush))

        # Get the file sizes
        res = self._getFileSize(files.keys())
        if not res['OK']:
            return res
        fileSizes = res['Value']

        for groupSE in (True, False):
            if not files:
                break
            seFiles = getFileGroups(files, groupSE=groupSE)

            for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
                lfns = seFiles[replicaSE]
                newTasks = self.createTasksBySize(lfns,
                                                  replicaSE,
                                                  fileSizes=fileSizes,
                                                  flush=flush)
                lfnsInTasks = []
                for task in newTasks:
                    lfnsInTasks += task[1]
                tasks += newTasks

                # Remove the selected files from the size cache
                self.clearCachedFileSize(lfnsInTasks)
                if not groupSE:
                    # Remove files from other SEs
                    for se in [se for se in seFiles if se != replicaSE]:
                        seFiles[se] = [
                            lfn for lfn in seFiles[se]
                            if lfn not in lfnsInTasks
                        ]
                # Remove files from global list
                for lfn in lfnsInTasks:
                    files.pop(lfn)

            self.logVerbose("groupBySize: %d tasks created with groupSE %s" %
                            (len(tasks) - nTasks, str(groupSE)))
            self.logVerbose(
                "groupBySize: %d files have not been included in tasks" %
                len(files))
            nTasks = len(tasks)

        self.logVerbose("Grouped %d files by size" % len(files))
        return S_OK(tasks)

    def getExistingCounters(self, normalise=False, requestedSites=[]):
        res = self.transClient.getCounters(
            'TransformationFiles', ['UsedSE'],
            {'TransformationID': self.params['TransformationID']})
        if not res['OK']:
            return res
        usageDict = {}
        for usedDict, count in res['Value']:
            usedSE = usedDict['UsedSE']
            if usedSE != 'Unknown':
                usageDict[usedSE] = count
        if requestedSites:
            siteDict = {}
            for se, count in usageDict.items():
                res = getSitesForSE(se)
                if not res['OK']:
                    return res
                for site in res['Value']:
                    if site in requestedSites:
                        siteDict[site] = count
            usageDict = siteDict.copy()
        if normalise:
            usageDict = self._normaliseShares(usageDict)
        return S_OK(usageDict)

    @timeThis
    def _getFileSize(self, lfns):
        """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
        lfns = list(lfns)
        cachedLFNSize = dict(self.cachedLFNSize)

        fileSizes = {}
        for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
            fileSizes[lfn] = cachedLFNSize[lfn]
        self.logDebug("Found cache hit for File size for %d files out of %d" %
                      (len(fileSizes), len(lfns)))
        lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
        if lfns:
            fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
            if not fileSizes['OK']:
                self.logError(fileSizes['Message'])
                return fileSizes
            fileSizes = fileSizes['Value']
        return S_OK(fileSizes)

    @timeThis
    def _getFileSizeFromCatalog(self, lfns, fileSizes):
        """
    Get file size from the catalog
    """
        lfns = list(lfns)
        fileSizes = dict(fileSizes)

        res = self.fc.getFileSize(lfns)
        if not res['OK']:
            return S_ERROR("Failed to get sizes for all files: %s" %
                           res['Message'])
        if res['Value']['Failed']:
            errorReason = sorted(set(res['Value']['Failed'].values()))
            self.logWarn(
                "Failed to get sizes for %d files:" %
                len(res['Value']['Failed']), errorReason)
        fileSizes.update(res['Value']['Successful'])
        self.cachedLFNSize.update((res['Value']['Successful']))
        self.logVerbose("Got size of %d files from catalog" % len(lfns))
        return S_OK(fileSizes)

    def clearCachedFileSize(self, lfns):
        """ Utility function
    """
        for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
            self.cachedLFNSize.pop(lfn)

    def getPluginParam(self, name, default=None):
        """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
        # get the value of a parameter looking 1st in the CS
        if default != None:
            valueType = type(default)
        else:
            valueType = None
        # First look at a generic value...
        optionPath = "TransformationPlugins/%s" % (name)
        value = Operations().getValue(optionPath, None)
        self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
        # Then look at a plugin-specific value
        optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
        value = Operations().getValue(optionPath, value)
        self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
        if value != None:
            default = value
        # Finally look at a transformation-specific parameter
        value = self.params.get(name, default)
        self.logVerbose("Transformation plugin param %s: '%s'. Convert to %s" %
                        (name, value, str(valueType)))
        if valueType and type(value) is not valueType:
            if valueType is list:
                try:
                    value = ast.literal_eval(
                        value) if value and value != 'None' else []
                except Exception:
                    value = [
                        val for val in value.replace(' ', '').split(',') if val
                    ]
            elif valueType is int:
                value = int(value)
            elif valueType is float:
                value = float(value)
            elif valueType is bool:
                if value in ('False', 'No', 'None', None, 0):
                    value = False
                else:
                    value = bool(value)
            elif valueType is not str:
                self.logWarn(
                    "Unknown parameter type (%s) for %s, passed as string" %
                    (str(valueType), name))
        self.logVerbose("Final plugin param %s: '%s'" % (name, value))
        return value

    @staticmethod
    def _normaliseShares(originalShares):
        shares = originalShares.copy()
        total = 0.0
        for site in shares.keys():
            share = float(shares[site])
            shares[site] = share
            total += share
        for site in shares.keys():
            share = 100.0 * (shares[site] / total)
            shares[site] = share
        return shares

    def uniqueSEs(self, ses):
        newSEs = []
        for se in ses:
            if not self.isSameSEInList(se, newSEs):
                newSEs.append(se)
        return newSEs

    def isSameSE(self, se1, se2):
        if se1 == se2:
            return True
        for se in (se1, se2):
            if se not in self.seConfig:
                self.seConfig[se] = {}
                res = StorageElement(se).getStorageParameters('SRM2')
                if res['OK']:
                    params = res['Value']
                    for item in ('Host', 'Path'):
                        self.seConfig[se][item] = params[item].replace(
                            't1d1', 't0d1')
                else:
                    self.logError(
                        "Error getting StorageElement parameters for %s" % se,
                        res['Message'])

        return self.seConfig[se1] == self.seConfig[se2]

    def isSameSEInList(self, se1, seList):
        if se1 in seList:
            return True
        for se in seList:
            if self.isSameSE(se1, se):
                return True
        return False

    def closerSEs(self, existingSEs, targetSEs, local=False):
        """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
        setTarget = set(targetSEs)
        sameSEs = set([
            se1 for se1 in setTarget for se2 in existingSEs
            if self.isSameSE(se1, se2)
        ])
        targetSEs = setTarget - set(sameSEs)
        if targetSEs:
            # Some SEs are left, look for sites
            existingSites = [
                self.dmsHelper.getLocalSiteForSE(se).get('Value')
                for se in existingSEs if not self.dmsHelper.isSEArchive(se)
            ]
            existingSites = set([site for site in existingSites if site])
            closeSEs = set([
                se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(
                    se).get('Value') in existingSites
            ])
            # print existingSEs, existingSites, targetSEs, closeSEs
            otherSEs = targetSEs - closeSEs
            targetSEs = list(closeSEs)
            random.shuffle(targetSEs)
            if not local and otherSEs:
                otherSEs = list(otherSEs)
                random.shuffle(otherSEs)
                targetSEs += otherSEs
        else:
            targetSEs = []
        return (targetSEs + list(sameSEs)) if not local else targetSEs
class TransformationPlugin( PluginBase ):
  """ A TransformationPlugin object should be instantiated by every transformation.
  """

  def __init__( self, plugin, transClient = None, dataManager = None ):
    """ plugin name has to be passed in: it will then be executed as one of the functions below, e.g.
        plugin = 'BySize' will execute TransformationPlugin('BySize')._BySize()
    """
    super( TransformationPlugin, self ).__init__( plugin )

    self.data = {}
    self.files = False
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient

    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager

    self.fc = FileCatalog()


  def isOK( self ):
    self.valid = True
    if ( not self.data ) or ( not self.params ):
      self.valid = False
    return self.valid

  def setInputData( self, data ):
    self.data = data

  def setTransformationFiles( self, files ): #TODO ADDED
    self.files = files

  def _Standard( self ):
    """ Simply group by replica location
    """
    res = self._groupByReplicas()
    if not res['OK']:
      return res
    newTasks = []
    for _se, lfns in res['Value']:
      newTasks.append( ( '', lfns ) )
    return S_OK( newTasks )

  def _BySize( self ):
    """ Alias for groupBySize
    """
    return self._groupBySize()

  def _Broadcast( self ):
    """ This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs.
    """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters." )

    targetseParam = self.params['TargetSE']
    targetSEs = []
    sourceSEs = eval( self.params['SourceSE'] )
    if targetseParam.count( '[' ):
      targetSEs = eval( targetseParam )
    elif type(targetseParam)==type([]):
      targetSEs = targetseParam
    else:
      targetSEs = [targetseParam]
    #sourceSEs = eval(self.params['SourceSE'])
    #targetSEs = eval(self.params['TargetSE'])
    destinations = int( self.params.get( 'Destinations', 0 ) )
    if destinations and ( destinations >= len(targetSEs) ):
      destinations = 0

    status = self.params['Status']
    groupSize = self.params['GroupSize']#Number of files per tasks

    fileGroups = self._getFileGroups( self.data )#groups by SE
    targetSELfns = {}
    for replicaSE, lfns in fileGroups.items():
      ses = replicaSE.split( ',' )
      #sourceSites = self._getSitesForSEs(ses)
      atSource = False
      for se in ses:
        if se in sourceSEs:
          atSource = True
      if not atSource:
        continue

      for lfn in lfns:
        targets = []
        sources = self._getSitesForSEs( ses )
        random.shuffle( targetSEs )
        for targetSE in targetSEs:
          site = self._getSiteForSE( targetSE )['Value']
          if not site in sources:
            if ( destinations ) and ( len( targets ) >= destinations ):
              continue
            sources.append( site )
          targets.append( targetSE )#after all, if someone wants to copy to the source, it's his choice
        strTargetSEs = str.join( ',', sorted( targets ) )
        if not targetSELfns.has_key( strTargetSEs ):
          targetSELfns[strTargetSEs] = []
        targetSELfns[strTargetSEs].append( lfn )
    tasks = []
    for ses, lfns in targetSELfns.items():
      tasksLfns = breakListIntoChunks(lfns, groupSize)
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          #do not allow groups smaller than the groupSize, except if transformation is in flush state
          tasks.append( ( ses, taskLfns ) )
    return S_OK( tasks )

  def _ByShare( self, shareType = 'CPU' ):
    """ first get the shares from the CS, and then makes the grouping looking at the history
    """
    res = self._getShares( shareType, normalise = True )
    if not res['OK']:
      return res
    cpuShares = res['Value']
    gLogger.info( "Obtained the following target shares (%):" )
    for site in sorted( cpuShares.keys() ):
      gLogger.info( "%s: %.1f" % ( site.ljust( 15 ), cpuShares[site] ) )

    # Get the existing destinations from the transformationDB
    res = self._getExistingCounters( requestedSites = cpuShares.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get existing file share", res['Message'] )
      return res
    existingCount = res['Value']
    if existingCount:
      gLogger.info( "Existing site utilization (%):" )
      normalisedExistingCount = self._normaliseShares( existingCount.copy() )
      for se in sorted( normalisedExistingCount.keys() ):
        gLogger.info( "%s: %.1f" % ( se.ljust( 15 ), normalisedExistingCount[se] ) )

    # Group the input files by their existing replicas
    res = self._groupByReplicas()
    if not res['OK']:
      return res
    replicaGroups = res['Value']

    tasks = []
    # For the replica groups 
    for replicaSE, lfns in replicaGroups:
      possibleSEs = replicaSE.split( ',' )
      # Determine the next site based on requested shares, existing usage and candidate sites
      res = self._getNextSite( existingCount, cpuShares, candidates = self._getSitesForSEs( possibleSEs ) )
      if not res['OK']:
        gLogger.error( "Failed to get next destination SE", res['Message'] )
        continue
      targetSite = res['Value']
      # Resolve the ses for the target site
      res = getSEsForSite( targetSite )
      if not res['OK']:
        continue
      ses = res['Value']
      # Determine the selected SE and create the task 
      for chosenSE in ses:
        if chosenSE in possibleSEs:
          tasks.append( ( chosenSE, lfns ) )
          if not existingCount.has_key( targetSite ):
            existingCount[targetSite] = 0
          existingCount[targetSite] += len( lfns )
    return S_OK( tasks )

  def _getShares( self, shareType, normalise = False ):
    """ Takes share from the CS, eventually normalize them
    """
    res = gConfig.getOptionsDict( '/Resources/Shares/%s' % shareType )
    if not res['OK']:
      return res
    if not res['Value']:
      return S_ERROR( "/Resources/Shares/%s option contains no shares" % shareType )
    shares = res['Value']
    for site, value in shares.items():
      shares[site] = float( value )
    if normalise:
      shares = self._normaliseShares( shares )
    if not shares:
      return S_ERROR( "No non-zero shares defined" )
    return S_OK( shares )

  def _getExistingCounters( self, normalise = False, requestedSites = [] ):
    res = self.transClient.getCounters( 'TransformationFiles', ['UsedSE'],
                                        {'TransformationID':self.params['TransformationID']} )
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE( se, gridName = 'LCG' )
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares( usageDict )
    return S_OK( usageDict )

  @classmethod
  def _normaliseShares( self, originalShares ):
    shares = originalShares.copy()
    total = 0.0
    for site in shares.keys():
      share = float( shares[site] )
      shares[site] = share
      total += share
    for site in shares.keys():
      share = 100.0 * ( shares[site] / total )
      shares[site] = share
    return shares

  def _getNextSite( self, existingCount, cpuShares, candidates = [] ):
    # normalise the shares
    siteShare = self._normaliseShares( existingCount )
    # then fill the missing share values to 0
    for site in cpuShares.keys():
      if ( not siteShare.has_key( site ) ):
        siteShare[site] = 0.0
    # determine which site is furthest from its share
    chosenSite = ''
    minShareShortFall = -float( "inf" )
    for site, cpuShare in cpuShares.items():
      if ( candidates ) and not ( site in candidates ):
        continue
      if not cpuShare:
        continue
      existingShare = siteShare[site]
      shareShortFall = cpuShare - existingShare
      if shareShortFall > minShareShortFall:
        minShareShortFall = shareShortFall
        chosenSite = site
    return S_OK( chosenSite )

  def _groupByReplicas( self ):
    """ Generates a job based on the location of the input data """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Standard: The 'Standard' plug-in requires parameters." )
    status = self.params['Status']
    groupSize = self.params['GroupSize']
    # Group files by SE
    fileGroups = self._getFileGroups( self.data )
    # Create tasks based on the group size
    tasks = []
    for replicaSE in sorted( fileGroups.keys() ):
      lfns = fileGroups[replicaSE]
      tasksLfns = breakListIntoChunks( lfns, groupSize )
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          tasks.append( ( replicaSE, taskLfns ) )
    return S_OK( tasks )

  def _groupBySize( self ):
    """ Generate a task for a given amount of data """
    if not self.params:
      return S_ERROR( "TransformationPlugin._BySize: The 'BySize' plug-in requires parameters." )
    status = self.params['Status']
    requestedSize = float( self.params['GroupSize'] ) * 1000 * 1000 * 1000 # input size in GB converted to bytes
    maxFiles = self.params.get( 'MaxFiles', 100 )
    # Group files by SE
    fileGroups = self._getFileGroups( self.data )
    # Get the file sizes
    res = self.fc.getFileSize( self.data )
    if not res['OK']:
      return S_ERROR( "Failed to get sizes for files" )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to get sizes for all files" )
    fileSizes = res['Value']['Successful']
    tasks = []
    for replicaSE, lfns in fileGroups.items():
      taskLfns = []
      taskSize = 0
      for lfn in lfns:
        taskSize += fileSizes[lfn]
        taskLfns.append( lfn )
        if ( taskSize > requestedSize ) or ( len( taskLfns ) >= maxFiles ):
          tasks.append( ( replicaSE, taskLfns ) )
          taskLfns = []
          taskSize = 0
      if ( status == 'Flush' ) and taskLfns:
        tasks.append( ( replicaSE, taskLfns ) )
    return S_OK( tasks )

  @classmethod
  def _getFileGroups( cls, fileReplicas ):
    """ get file groups dictionary { "SE1,SE2,SE3" : [ lfn1, lfn2 ], ... }
    
    :param dict fileReplicas: { lfn : [SE1, SE2, SE3], ... }
    """
    fileGroups = {}
    for lfn, replicas in fileReplicas.items():
      replicaSEs = ",".join( sorted( list( set( replicas ) ) ) )
      if replicaSEs not in fileGroups:
        fileGroups[replicaSEs] = []
      fileGroups[replicaSEs].append( lfn )
    return fileGroups

  @classmethod
  def _getSiteForSE( cls, se ):
    """ Get site name for the given SE
    """
    result = getSitesForSE( se, gridName = 'LCG' )
    if not result['OK']:
      return result
    if result['Value']:
      return S_OK( result['Value'][0] )
    return S_OK( '' )

  @classmethod
  def _getSitesForSEs( cls, seList ):
    """ Get all the sites for the given SE list
    """
    sites = []
    for se in seList:
      result = getSitesForSE( se, gridName = 'LCG' )
      if result['OK']:
        sites += result['Value']
    return sites
Exemplo n.º 5
0
class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)