Python TransformationClient.getBookkeepingQuery примеры использования

Язык программирования: Python

Пространство имен/Пакет: LHCbDIRAC.TransformationSystem.Client.TransformationClient

Класс/Тип: TransformationClient

Метод/Функция: getBookkeepingQuery

Примеров на hotexamples.com: 5

Python TransformationClient.getBookkeepingQuery - 5 примеров найдено. Это лучшие примеры Python кода для LHCbDIRAC.TransformationSystem.Client.TransformationClient.TransformationClient.getBookkeepingQuery, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getTransformation(6)

getBookkeepingQuery(5)

getTransformations(4)

getTransformationTasks(3)

getTransformationFiles(3)

TransformationClient(2)

getTransformationParameters(2)

extendTransformation(2)

getTransformationLogging(2)

getStoredJobDescription(1)

getTransformationsWithBkQueries(1)

getTransformationTaskStats(1)

getTransformationSummary(1)

getTransformationRuns(1)

getStoredJobDescriptionIDs(1)

getRunsInCache(1)

getRunsMetadata(1)

addBookkeepingQuery(1)

getFileSummary(1)

getDestinationForRun(1)

get(1)

deleteTransformationParameter(1)

deleteTransformationBookkeepingQuery(1)

addTransformationRunFiles(1)

addStoredJobDescription(1)

addRunsMetadata(1)

addFilesToTransformation(1)

addBookkeepingQueryRunList(1)

removeStoredJobDescription(1)

Пример #1

Показать файл

 def getBKProductions(self, visible=None):
     """
 It returns a list of productions
 """
     if visible is None:
         visible = self.isVisible()
     prodList = self.__bkQueryDict.get('Production')
     if prodList:
         if not isinstance(prodList, list):
             prodList = [prodList]
         return sorted(prodList)
     if not self.getProcessingPass():
         gLogger.fatal(
             'Impossible to get a list of productions without the Processing Pass'
         )
         return []
     eventTypes = self.__bkQueryDict.get('EventType')
     if not isinstance(eventTypes, list):
         eventTypes = [eventTypes]
     fullList = set()
     for eventType in eventTypes:
         bkQ = BKQuery(self.__bkQueryDict)
         bkQ.setVisible(visible)
         bkDict = bkQ.setEventType(eventType)
         # gLogger.notice( 'Get productions for BK query', str( bkDict ) )
         res = self.__bkClient.getProductions(bkDict)
         if not res['OK']:
             gLogger.error('Error getting productions from BK',
                           res['Message'])
             return []
         if self.getProcessingPass().replace('/', '') != 'Real Data':
             fileTypes = self.getFileTypeList()
             prodList = set(prod for prods in res['Value']['Records']
                            for prod in prods
                            if self.__getProdStatus(prod) != 'Deleted')
             # print '\n', self.__bkQueryDict, res['Value']['Records'], '\nVisible:', visible, prodList
             pList = set()
             if fileTypes:
                 transClient = TransformationClient()
                 for prod in prodList:
                     res = transClient.getBookkeepingQuery(prod)
                     if res['OK'] and res['Value']['FileType'] in fileTypes:
                         pList.add(prod)
             if not pList:
                 pList = prodList
         else:
             runList = sorted(
                 [-run for r in res['Value']['Records'] for run in r])
             startRun = int(self.__bkQueryDict.get('StartRun', 0))
             endRun = int(self.__bkQueryDict.get('EndRun', sys.maxsize))
             pList = set(run for run in runList
                         if run >= startRun and run <= endRun)
         fullList.update(pList)
     return sorted(fullList)

Пример #2

Показать файл

def execute():
    tr = TransformationClient()

    for switch in Script.getUnprocessedSwitches():
        pass

    bkQuery = dmScript.getBKQuery()
    if not bkQuery:
        gLogger.notice("No BKQuery given...")
        exit(1)

    startTime = time.time()
    prods = bkQuery.getBKProductions()  # visible = 'All' )

    parents = {}
    productions = {}
    for prod in prods:
        type = tr.getTransformation(prod).get('Value',
                                              {}).get('Type', 'Unknown')
        productions[prod] = type
        parent = tr.getBookkeepingQuery(prod).get('Value',
                                                  {}).get('ProductionID', '')
        if parent:
            type = tr.getTransformation(parent).get('Value',
                                                    {}).get('Type', 'Unknown')
            parents[parent] = type

    gLogger.notice("For BK path %s:" % bkQuery.getPath())
    if not prods:
        gLogger.notice('No productions found!')
    else:
        printProds('Productions found', productions)
        if parents:
            printProds('Parent productions', parents)

    gLogger.notice('Completed in %.1f seconds' % (time.time() - startTime))

Пример #3

Показать файл

Файл: BookkeepingWatchAgent.py Проект: antolu/LHCbDIRAC

class BookkeepingWatchAgent(AgentModule, TransformationAgentsUtilities):
  """ LHCbDIRAC only agent. A threaded agent.
  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """
    AgentModule.__init__(self, *args, **kwargs)
    TransformationAgentsUtilities.__init__(self)

    self.bkQueriesToBeChecked = Queue.Queue()
    self.bkQueriesInCheck = []

    self.fullUpdatePeriod = 86400
    self.bkUpdateLatency = 7200
    self.debug = False

    self.transInThread = {}

    self.pickleFile = 'BookkeepingWatchAgent.pkl'
    self.chunkSize = 1000

    self.pluginsWithNoRunInfo = ['LHCbStandard', 'ReplicateDataset', 'ArchiveDataset',
                                 'LHCbMCDSTBroadcastRandom', 'ReplicateToLocalSE',
                                 'RemoveReplicas', 'RemoveReplicasWhenProcessed',
                                 'RemoveReplicasWithAncestors', 'ReplicateWithAncestors',
                                 'ReduceReplicas', 'RemoveDatasetFromDisk',
                                 'DestroyDataset', 'DestroyDatasetWhenProcessed',
                                 'BySize', 'Standard']

    self.timeLog = {}
    self.fullTimeLog = {}
    self.bkQueries = {}

    self.transClient = None
    self.bkClient = None

  def initialize(self):
    """ Make the necessary initializations.
        The ThreadPool is created here, the _execute() method is what each thread will execute.
    """

    self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', self.fullUpdatePeriod)
    self.bkUpdateLatency = self.am_getOption('BKUpdateLatency', self.bkUpdateLatency)
    self.debug = self.am_getOption('verbose', self.debug)

    self.pickleFile = os.path.join(self.am_getWorkDirectory(), self.pickleFile)
    self.chunkSize = self.am_getOption('maxFilesPerChunk', self.chunkSize)

    self.pluginsWithNoRunInfo = Operations().getValue('TransformationPlugins/PluginsWithNoRunInfo',
                                                      self.pluginsWithNoRunInfo)

    self._logInfo('Full Update Period: %d seconds' % self.fullUpdatePeriod)
    self._logInfo('BK update latency : %d seconds' % self.bkUpdateLatency)
    self._logInfo('Plugins with no run info: %s' % ', '.join(self.pluginsWithNoRunInfo))

    self.transClient = TransformationClient()
    self.bkClient = BookkeepingClient()

    try:
      with open(self.pickleFile, 'r') as pf:
        self.timeLog = pickle.load(pf)
        self.fullTimeLog = pickle.load(pf)
        self.bkQueries = pickle.load(pf)
      self._logInfo("successfully loaded Log from", self.pickleFile, "initialize")
    except (EOFError, IOError):
      self._logInfo("failed loading Log from", self.pickleFile, "initialize")
      self.timeLog = {}
      self.fullTimeLog = {}
      self.bkQueries = {}

    maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM)
    return S_OK()

  @gSynchro
  def __dumpLog(self):
    """ dump the log in the pickle file
    """
    if self.pickleFile:
      try:
        with open(self.pickleFile, 'w') as pf:
          pickle.dump(self.timeLog, pf)
          pickle.dump(self.fullTimeLog, pf)
          pickle.dump(self.bkQueries, pf)
        self._logVerbose("successfully dumped Log into %s" % self.pickleFile)
      except IOError as e:
        self._logError("fail to open %s: %s" % (self.pickleFile, e))
      except pickle.PickleError as e:
        self._logError("fail to dump %s: %s" % (self.pickleFile, e))
      except ValueError as e:
        self._logError("fail to close %s: %s" % (self.pickleFile, e))

  ################################################################################

  def execute(self):
    """ Main execution method. Just fills a list, and a queue, with BKKQueries ID.
    """

    gMonitor.addMark('Iteration', 1)
    # Get all the transformations
    result = self.transClient.getTransformations(condDict={'Status': ['Active', 'Idle']})
    if not result['OK']:
      self._logError("Failed to get transformations.", result['Message'])
      return S_OK()
    transIDsList = [long(transDict['TransformationID']) for transDict in result['Value']]
    res = self.transClient.getTransformationsWithBkQueries(transIDsList)
    if not res['OK']:
      self._logError("Failed to get transformations with Bk Queries.", res['Message'])
      return S_OK()
    transIDsWithBkQueriesList = res['Value']

    _count = 0
    # Process each transformation
    for transID in transIDsWithBkQueriesList:
      if transID in self.bkQueriesInCheck:
        continue
      self.bkQueriesInCheck.append(transID)
      self.bkQueriesToBeChecked.put(transID)
      _count += 1

    self._logInfo("Out of %d transformations, %d put in thread queue" % (len(result['Value']), _count))

    self.__dumpLog()
    return S_OK()

  def _execute(self, threadID):
    """ Real executor. This is what is executed by the single threads - so do not return here! Just continue
    """

    while True:  # not self.bkQueriesToBeChecked.empty():

      transID = None

      try:

        transID = self.bkQueriesToBeChecked.get()
        self.transInThread[transID] = ' [Thread%d] [%s] ' % (threadID, str(transID))

        startTime = time.time()
        self._logInfo("Processing transformation %s." % transID, transID=transID)

        res = self.transClient.getTransformation(transID, extraParams=False)
        if not res['OK']:
          self._logError("Failed to get transformation", res['Message'], transID=transID)
          continue
        transPlugin = res['Value']['Plugin']

        res = self.transClient.getBookkeepingQuery(transID)
        if not res['OK']:
          self._logError("Failed to get BkQuery", res['Message'], transID=transID)
          continue
        bkQuery = res['Value']

        # Determine the correct time stamp to use for this transformation
        now = datetime.datetime.utcnow()
        self.__timeStampForTransformation(transID, bkQuery, now)

        try:
          files = self.__getFiles(transID, bkQuery, now)
        except RuntimeError as e:
          # In case we failed a full query, we should retry full query until successful
          if 'StartDate' not in bkQuery:
            self.bkQueries.pop(transID, None)
          self._logError("Failed to get response from the Bookkeeping: %s" % e, "", "__getFiles", transID)
          continue

        runDict = {}
        filesMetadata = {}
        # get the files metadata
        for lfnChunk in breakListIntoChunks(files, self.chunkSize):
          start = time.time()
          res = self.bkClient.getFileMetadata(lfnChunk)
          self._logVerbose("Got metadata from BK for %d files" % len(lfnChunk), transID=transID, reftime=start)
          if not res['OK']:
            self._logError("Failed to get BK metadata for %d files" % len(lfnChunk),
                           res['Message'], transID=transID)
            # No need to return as we only consider files that are successful...
          else:
            filesMetadata.update(res['Value']['Successful'])

        # There is no need to add the run information for a transformation that doesn't need it
        if transPlugin not in self.pluginsWithNoRunInfo:
          for lfn, metadata in filesMetadata.iteritems():
            runID = metadata.get('RunNumber', None)
            if isinstance(runID, (basestring, int, long)):
              runDict.setdefault(int(runID), []).append(lfn)
          try:
            self.__addRunsMetadata(transID, runDict.keys())
          except RuntimeError as e:
            self._logException("Failure adding runs metadata",
                               method="__addRunsMetadata",
                               lException=e,
                               transID=transID)
        else:
          runDict[None] = filesMetadata.keys()

        # Add all new files to the transformation
        for runID in sorted(runDict):
          lfnList = runDict[runID]
          # We enter all files of a run at once, otherwise do it by chunks
          lfnChunks = [lfnList] if runID else breakListIntoChunks(lfnList, self.chunkSize)
          for lfnChunk in lfnChunks:
            # Add the files to the transformation
            self._logVerbose('Adding %d lfns for transformation' % len(lfnChunk), transID=transID)
            result = self.transClient.addFilesToTransformation(transID, lfnChunk)
            if not result['OK']:
              self._logError("Failed to add %d lfns to transformation" % len(lfnChunk), result['Message'],
                             transID=transID)
              return result
            else:
              # Handle errors
              errors = {}
              for lfn, error in result['Value']['Failed'].iteritems():
                errors.setdefault(error, []).append(lfn)
              for error, lfns in errors.iteritems():
                self._logWarn("Failed to add files to transformation", error, transID=transID)
                self._logVerbose("\n\t".join([''] + lfns))
              # Add the metadata and RunNumber to the newly inserted files
              addedLfns = [lfn for (lfn, status) in result['Value']['Successful'].iteritems() if status == 'Added']
              if addedLfns:
                # Add files metadata: size and file type
                lfnDict = dict((lfn, {'Size': filesMetadata[lfn]['FileSize'],
                                      'FileType': filesMetadata[lfn]['FileType']})
                               for lfn in addedLfns)
                res = self.transClient.setParameterToTransformationFiles(transID, lfnDict)
                if not res['OK']:
                  self._logError("Failed to set transformation files metadata", res['Message'])
                  return res
                # Add run information if it exists
                if runID:
                  self._logInfo("Added %d files to transformation for run %d, now including run information"
                                % (len(addedLfns), runID), transID=transID)
                  self._logVerbose("Associating %d files to run %d" % (len(addedLfns), runID), transID=transID)
                  res = self.transClient.addTransformationRunFiles(transID, runID, addedLfns)
                  if not res['OK']:
                    self._logError("Failed to associate %d files to run %d" % (len(addedLfns), runID),
                                   res['Message'], transID=transID)
                    return res
                else:
                  self._logInfo("Added %d files to transformation" % len(addedLfns), transID=transID)

      except Exception as x:  # pylint: disable=broad-except
        self._logException('Exception while adding files to transformation',
                           lException=x,
                           method='_execute',
                           transID=transID)
      finally:
        self._logInfo("Processed transformation", transID=transID, reftime=startTime)
        if transID in self.bkQueriesInCheck:
          self.bkQueriesInCheck.remove(transID)
        self.transInThread.pop(transID, None)

    return S_OK()

  @gSynchro
  def __timeStampForTransformation(self, transID, bkQuery, now):
    """ Determine the correct time stamp to use for this transformation
    """

    fullTimeLog = self.fullTimeLog.setdefault(transID, now)
    bkQueryLog = self.bkQueries.setdefault(transID, {})

    bkQueryLog.pop('StartDate', None)
    self.bkQueries[transID] = bkQuery.copy()
    if transID in self.timeLog \
            and bkQueryLog == bkQuery \
            and (now - fullTimeLog) < datetime.timedelta(seconds=self.fullUpdatePeriod):
      # If it is more than a day since the last reduced query, make a full query just in case
      timeStamp = self.timeLog[transID]
      delta = datetime.timedelta(seconds=self.bkUpdateLatency)
      bkQuery['StartDate'] = (timeStamp - delta).strftime('%Y-%m-%d %H:%M:%S')
    if 'StartDate' not in bkQuery:
      self.fullTimeLog[transID] = now

  def __getFiles(self, transID, bkQuery, now):
    """ Perform the query to the Bookkeeping
    """
    self._logInfo("Using BK query for transformation: %s" % str(bkQuery), transID=transID)
    start = time.time()
    result = self.bkClient.getFiles(bkQuery)
    self._logVerbose("BK query time: %.2f seconds." % (time.time() - start), transID=transID)
    if not result['OK']:
      raise RuntimeError(result['Message'])
    else:
      self.__updateTimeStamp(transID, now)
      if result['Value']:
        self._logInfo("Obtained %d files from BK" % len(result['Value']), transID=transID)
      return result['Value']

  @gSynchro
  def __updateTimeStamp(self, transID, now):
    """
    Update time stamp for current transformation to now
    """
    self.timeLog[transID] = now

  def __addRunsMetadata(self, transID, runsList):
    """ Add the run metadata
    """
    runsInCache = self.transClient.getRunsInCache({'Name': ['TCK', 'CondDb', 'DDDB']})
    if not runsInCache['OK']:
      raise RuntimeError(runsInCache['Message'])
    newRuns = list(set(runsList) - set(runsInCache['Value']))
    if newRuns:
      self._logVerbose("Associating run metadata to %d runs" % len(newRuns), transID=transID)
      res = self.bkClient.getRunInformation({'RunNumber': newRuns, 'Fields': ['TCK', 'CondDb', 'DDDB']})
      if not res['OK']:
        raise RuntimeError(res['Message'])
      else:
        for run, runMeta in res['Value'].iteritems():
          res = self.transClient.addRunsMetadata(run, runMeta)
          if not res['OK']:
            raise RuntimeError(res['Message'])
    # Add run duration to the metadata
    runsInCache = self.transClient.getRunsInCache({'Name': ['Duration']})
    if not runsInCache['OK']:
      raise RuntimeError(runsInCache['Message'])
    newRuns = list(set(runsList) - set(runsInCache['Value']))
    if newRuns:
      self._logVerbose("Associating run duration to %d runs" % len(newRuns), transID=transID)
      res = self.bkClient.getRunInformation({'RunNumber': newRuns, 'Fields': ['JobStart', 'JobEnd']})
      if not res['OK']:
        raise RuntimeError(res['Message'])
      else:
        for run, runMeta in res['Value'].iteritems():
          duration = (runMeta['JobEnd'] - runMeta['JobStart']).seconds
          res = self.transClient.addRunsMetadata(run, {'Duration': duration})
          if not res['OK']:
            raise RuntimeError(res['Message'])

  def finalize(self):
    """ Gracious finalization
    """
    if self.bkQueriesInCheck:
      self._logInfo("Wait for queue to get empty before terminating the agent (%d tasks)" % len(self.transInThread))
      self.bkQueriesInCheck = []
      while self.transInThread:
        time.sleep(2)
      self.log.info("Threads are empty, terminating the agent...")
    return S_OK()

Пример #4

Показать файл

    elif opt == 'AddRuns':
        try:
            settings[opt] = [int(runID) for runID in val.split(',')]
        except TypeError:
            gLogger.error("Invalid run list", str(val))
            DIRAC.exit(1)
    elif opt == 'List':
        settings[opt] = True
if 'AddRuns' in settings and ('StartRun' in settings or 'EndRun' in settings):
    gLogger.error(
        'Incompatible requests, cannot set run list and start/end run')
    DIRAC.exit(1)

from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
client = TransformationClient()
res = client.getBookkeepingQuery(prodId)
if not res['OK']:
    gLogger.error("Error retrieving BKQuery for transformation %s" % prodId,
                  res['Message'])
    DIRAC.exit(2)
bkDict = res['Value']
startRun = bkDict.get('StartRun', 0)
endRun = bkDict.get('EndRun', 0)
runNumbers = bkDict.get('RunNumbers', 'All')

if ('StartRun' in settings
        or 'EndRun' in settings) and runNumbers and runNumbers != 'All':
    gLogger.notice("Transformation %d has RunNumbers key" % prodId)
    settings = {'List': True}

if 'AddRuns' in settings and (startRun or endRun):

Пример #5

Показать файл

class ProcessingProgress( object ):

  def __init__( self, cacheFile = None ):
    if not cacheFile:
      self.prodStatFile = os.path.join( os.environ['HOME'], ".dirac/work", "dirac-production-stats.pkl" )
    else:
      self.prodStatFile = cacheFile
    self.cacheVersion = '0.0'
    self.clearCache = []
    self.cachedInfo = {}

    # Recuperate the previous cached information
    self.readCache()

    self.bk = BookkeepingClient()
    self.transClient = TransformationClient()

  def setClearCache( self, clearCache ):
    self.clearCache = clearCache

  def __getProdBkDict( self, prod ):
    res = self.transClient.getBookkeepingQuery( prod )
    if not res['OK']:
      gLogger.error( "Couldn't get BK query on production %d" % prod )
      return {}
    prodBKDict = res['Value']
    return prodBKDict

  def getFullStats( self, bkQuery, printResult = False ):
    processingPass = bkQuery.getProcessingPass()
    if printResult:
      gLogger.info( "\nStatistics for processing %s, condition %s\n" % ( processingPass, bkQuery.getConditions() ) )
    prodStats = []
    processingPass = processingPass.split( '/' )
    if len( processingPass ) != 4 or processingPass[1] != "Real Data":
      gLogger.error( "Processing pass should be /Real Data/<Reco>/<Stripping>" )
      return []

    # Get production numbers for the Reco
    recoBKQuery = BKQuery( bkQuery )
    recoBKQuery.setProcessingPass( '/'.join( processingPass[0:3] ) )
    recoList = recoBKQuery.getBKProductions( visible = False )
    recoRunRanges = {}
    recoDQFlags = []
    for prod in recoList:
      prodBKDict = self.__getProdBkDict( prod )
      if prodBKDict:
        recoRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )]
        dqFlags = prodBKDict.get( "DataQualityFlag", ['UNCHECKED', 'EXPRESS_OK', 'OK'] )
        if isinstance( dqFlags, basestring ):
          dqFlags = dqFlags.split( ',' )
        recoDQFlags += [fl for fl in dqFlags if fl not in recoDQFlags]
      else:
        recoRunRanges[prod] = [0, 0]
    # Sort productions by runs
    try:
      recoList.sort( cmp = ( lambda p1, p2: int( recoRunRanges[p1][0] - recoRunRanges[p2][1] ) ) )
    except:
      print "Exception in sorting productions:"
      for p in recoList:
        print p, recoRunRanges[p]
    gLogger.verbose( "Reconstruction productions found (%d): %s" % ( len( recoList ), str( sorted( recoList ) ) ) )
    gLogger.verbose( "Reconstruction DQ flags: %s" % str( recoDQFlags ) )

    # Get productions for merging
    mergeList = []
    mergeStripProds = {}
    # Get stripping productions as parents of merging productions
    stripList = []
    for prod in bkQuery.getBKProductions( visible = False ):
      prodBKDict = self.__getProdBkDict( prod )
      gLogger.verbose( "BK query for production %s: %s" % ( prod, str( prodBKDict ) ) )
      mergedTypes = prodBKDict.get( 'FileType' )
      if type( mergedTypes ) != type( [] ):
        mergedTypes = [mergedTypes]
      if [ft for ft in bkQuery.getFileTypeList() if ft in mergedTypes] and 'ProductionID' in prodBKDict:
        mergeList.append( prod )
        prods = prodBKDict['ProductionID']
        if type( prods ) != type( [] ):
          prods = [prods]
        stripList += prods
        mergeStripProds[prod] = [str( p ) for p in prods]
      else:
        _msgTuple = ( str( bkQuery.getFileTypeList() ), prod, str( prodBKDict ) )
        gLogger.verbose( "Could not find production or filetype %s in BKquery of production %d (%s)" % _msgTuple )
    mergeList.sort( cmp = ( lambda p1, p2: int( mergeStripProds[p1][0] ) - int( mergeStripProds[p2][0] ) ) )
    gLogger.verbose( "Merging productions found: %s" % str( mergeList ) )

    # get list of stripping productions (from merging)
    stripRunRanges = {}
    for prod in stripList:
      prodBKDict = self.__getProdBkDict( prod )
      if prodBKDict:
        stripRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )]
      else:
        stripRunRanges[prod] = [0, 0]
    # Sort productions by runs
    try:
      stripList.sort( cmp = ( lambda p1, p2: int( stripRunRanges[p1][0] - stripRunRanges[p2][1] ) ) )
    except:
      print "Error when sorting stripping productions:"
      for prodStrip in stripList:
        print prodStrip, stripRunRanges[prodStrip]
    gLogger.verbose( "Stripping productions found (%d): %s" % ( len( stripList ), str( sorted( stripList ) ) ) )

    # Get all runs corresponding to the run range used by the Reco productions
    rawBKQuery = BKQuery( bkQuery )
    rawBKQuery.setProcessingPass( '/Real Data' )
    rawBKQuery.setFileType( "RAW" )
    # get the list of runs (-prodNum)
    fullRunList = rawBKQuery.getBKRuns()
    gLogger.verbose( "Initial list of runs: %s" % str( fullRunList ) )
    recoRunList = []
    openProd = False
    for prod in [p for p in recoList]:
      # Forget fully openProd productions
      # Don't consider productions without a BK query (these are individual files)
      if recoRunRanges[prod][1] == sys.maxint and recoRunRanges[prod][0] != -sys.maxint:
        openProd = True
        # Try and find if that open production overlaps with a closed one, in which case, remove it
        # Do nothing for derived productions
        for p in [p for p in recoList if p != prod and recoRunRanges[prod] != recoRunRanges[p]]:
          if recoRunRanges[prod][0] < recoRunRanges[p][1] and recoRunRanges[p][1] != sys.maxint:
            openProd = False
            gLogger.verbose( "Production %s was removed as redundant..." % str( prod ) )
            recoList.remove( prod )
            break
        if not openProd: continue
      recoRunList += [run for run in fullRunList if run not in recoRunList and run >= recoRunRanges[prod][0] and run <= recoRunRanges[prod][1]]
    gLogger.verbose( "List of runs matching Reco (%d): %s" % ( len( recoRunList ), str( sorted( recoRunList ) ) ) )

    restrictToStripping = True
    if restrictToStripping and not openProd and stripList:
      runList = []
      for prod in stripList:
        runList += [run for run in recoRunList if run not in runList and run >= stripRunRanges[prod][0] and run <= stripRunRanges[prod][1]]
    else:
      runList = recoRunList
    gLogger.verbose( "Final list of runs matching Reco and Stripping (%d): %s" % ( len( runList ), str( sorted( runList ) ) ) )

    # Now get statistics from the runs
    info, runInfo = self._getStatsFromRuns( int( bkQuery.getEventTypeList()[0] ), runList, recoDQFlags )
    rawInfo = StatInfo( processingPass[1], info )
    prodStats.append( rawInfo )
    if printResult:
      gLogger.info( "%s - All runs in Reco productions" % processingPass[1] )
      for fileInfo in runInfo:
        if runInfo[fileInfo]:
          gLogger.info( "%s runs (%d): %s" % ( fileInfo, len( runInfo[fileInfo] ), str( runInfo[fileInfo] ) ) )
      summStr = "%s files, " % rawInfo.getItemAsString( 'Files' )
      summStr += "%s events in " % rawInfo.getItemAsString( 'Events' )
      summStr += "%s runs, luminosity (pb-1):All=%s, Bad=%s, OK=%s" % ( rawInfo.getItemAsString( 'Runs' ),
                                                                        rawInfo.getItemAsString( 'Lumi' ),
                                                                        rawInfo.getItemAsString( 'BadLumi' ),
                                                                        rawInfo.getItemAsString( 'OKLumi' ) )
      gLogger.info( summStr )

    # Create the info for the 3 sets of productions
    prodSets = []
    fileType = bkQuery.getFileTypeList()[0]
    prodSets.append( {'Name': processingPass[2], 'FileType': ['SDST', 'FULL.DST'], 'List':recoList,
                      'RunRange':recoRunRanges, 'MotherProds':None, 'AllReplicas':False } )
    prodSets.append( {'Name': processingPass[3], 'FileType': fileType, 'List':stripList,
                      'RunRange':stripRunRanges, 'MotherProds':None, 'AllReplicas':True, 'StatForOK':False } )
    prodSets.append( {'Name': "Merging (%s)" % fileType.split( '.' )[0], 'FileType': fileType, 'List':mergeList,
                      'RunRange':None, 'MotherProds':mergeStripProds, 'AllReplicas':False } )

    prevInfo = rawInfo
    for prodSet in prodSets:
      info = StatInfo( prodSet['Name'], self._getProdInfo( prodSet, runList, printResult = printResult ) )
      info.setRawInfo( rawInfo )
      info.setPrevInfo( prevInfo )
      prevInfo = info
      prodStats.append( info )

    self.saveCache()
    return prodStats

  @staticmethod
  def __sumProdInfo( info, totInfo ):
    for inf in info:
      for flag in info[inf]:
        if inf == 'Runs':
          totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, [] ) + info[inf][flag]
        else:
          totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, 0 ) + info[inf][flag]
    return totInfo

  def _getProdInfo( self, prodSet, runList, printResult = False ):
    totInfo = {}
    if printResult:
      gLogger.info( "" )
    for prod in prodSet['List']:
      info, runInfo = self._getStatsFromBK( prod, prodSet['FileType'], runList, prodSet['AllReplicas'] )
      if info['Files'][''] == 0:
        continue
      if not prodSet.get( 'StatForOK', True ):
        for item in info:
          for fl in info[item]:
            if fl == 'OK':
              info[item][fl] = 0 if not item == 'Runs' else []
      runRange = prodSet['RunRange']
      if runRange and prod in runRange and runRange[prod][0] == 0 and runRange[prod][1] == 0:
        for flag in info['Runs']:
          info['Runs'][flag] = []
      totInfo = self.__sumProdInfo( info, totInfo )
      if printResult:
        summStr = "%s production %d -" % ( prodSet['Name'], prod )
        if runRange and prod in runRange:
          firstRun = runRange[prod][0]
          lastRun = runRange[prod][1]
          if firstRun:
            summStr += " From run %d" % int( firstRun )
          if lastRun and lastRun != sys.maxint:
            summStr += " Up to run %d" % int( lastRun )
          if firstRun == 0 and lastRun == 0:
            summStr += "No run range specified"
        motherProds = prodSet['MotherProds']
        if motherProds and prod in motherProds:
          summStr += " from productions %s" % motherProds[prod]
        gLogger.info( summStr )
        for inf in runInfo:
          if runInfo[inf]:
            gLogger.info( "%s runs (%d): %s" % ( inf, len( runInfo[inf] ), str( runInfo[inf] ) ) )
        summStr = "%d files, " % info['Files']['']
        if info['Events']:
          summStr += "%d events in " % info['Events']['']
        _msgTuple = ( len( info['Runs'][''] ), info['Lumi'][''], info['Lumi']['Bad'], info['Lumi']['OK'] )
        summStr += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f" % _msgTuple
        gLogger.info( summStr )
    for flag in totInfo.get( 'Runs', [] ):
      totInfo['Runs'][flag] = len( totInfo['Runs'][flag] )
    return totInfo

  @staticmethod
  def outputResults( conditions, processingPass, prodStats ):
    outputString = ""
    _msgTuple = ( conditions, ",", processingPass, "on", time.ctime( time.time() ) )
    outputString += "\nProduction progress for %s %s %s %s %s\n" % _msgTuple
    if len( prodStats ) < 4:
      outputString += "No statistics found for this BK query"
      return outputString
    for i in xrange( 4 ):
      info = prodStats[i]
      if not info:
        continue
      name = info.getName()
      outputString += "\nSummary for %s\n" % name
      outputString += "%d files, " % info.getItem( 'Files' )
      if info.getItem( 'Events' ):
        outputString += "%d events in " % info.getItem( 'Events' )
      _msgTuple = ( info.getItem( 'Runs' ), info.getItem( 'Lumi' ), info.getItem( 'BadLumi' ), info.getItem( 'OKLumi' ) )
      outputString += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f\n" % _msgTuple
      prevStats = prodStats[:i]
      prevStats.reverse()
      for prevInfo in prevStats:
        name = prevInfo.getName()
        if prevInfo.getItem( 'Runs' ) == 0:
          outputString += "From %s : - No runs...\n" % name
        else:
          outputString += "From %s : %.1f%% files, " % ( name, 100.*info.getItem( 'Files' ) / prevInfo.getItem( 'Files' ) )
          if info.getItem( 'Events' ) and prevInfo.getItem( 'Events' ):
            outputString += "%.1f%% events\n" % ( 100.*info.getItem( 'Events' ) / prevInfo.getItem( 'Events' ) )
          outputString += "%.1f%% runs, %.1f%% luminosity\n" \
                          % ( 100. * info.getItem( 'Runs' ) / prevInfo.getItem( 'Runs' ),
                              100. * info.getItem( 'Lumi' ) / prevInfo.getItem( 'Lumi' ) )
    return outputString

  def __getRunsDQFlag( self, runList, evtType ):
    res = self.bk.getRunFilesDataQuality( runList )
    runFlags = {}
    if res['OK']:
      dqFlags = res['Value']
      for dq in dqFlags:
        if dq[2] == evtType:
          runFlags.setdefault( dq[0], [] ).append( dq[1] )
    runDQFlags = {}
    flags = ( 'BAD', 'OK', 'EXPRESS_OK', 'UNCHECKED' )
    for run in runFlags:
      for fl in flags:
        if fl in runFlags[run]:
          runDQFlags[run] = fl
          break
    return runDQFlags

  def _getStatsFromRuns( self, evtType, runList, recoDQFlags ):
    info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} )
    for inf in info:
      info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 )
    now = datetime.datetime.utcnow()
    # Set to True to renew the cache
    clearCache = 'RAW' in self.clearCache
    newRuns = [ run for run in runList if clearCache
                or run not in self.cachedInfo
                or 'DQFlag' not in self.cachedInfo[run]
                or ( now - self.cachedInfo[run]['Time'] ) < datetime.timedelta( days = 2 )   ]
    if newRuns:
      runFlags = self.__getRunsDQFlag( newRuns, evtType )
    else:
      runFlags = {}
    runsByDQFlag = {}
    runInfo = {}
    for run in runList:
      cached = self.cachedInfo.get( run, {} )
      cachedTime = cached.get( 'Time', None )
      if run not in newRuns:
        cachedFiles = cached.get( 'Files', 0 )
        cachedEvents = cached.get( 'EventStat', 0 )
        cachedLumi = cached.get( 'Luminosity', 0 )
        dqFlag = cached.get( 'DQFlag', None )
      else:
        res = self.bk.getRunInformations( run )
        if res['OK']:
          val = res['Value']
          ind = val['Stream'].index( 90000000 )
          cachedFiles = val['Number of file'][ind]
          cachedEvents = val['Number of events'][ind]
          cachedLumi = val['luminosity'][ind]
          cachedTime = val['RunStart']
        else:
          gLogger.error( "Unable to get run information for run %s" % str( run ) )
          continue
        dqFlag = runFlags[run]
      self.cachedInfo[run] = { 'Time':cachedTime, 'Files':cachedFiles, 'EventStat': cachedEvents,
                              'Luminosity': cachedLumi, 'DQFlag':dqFlag }
      runsByDQFlag[dqFlag] = runsByDQFlag.setdefault( dqFlag, 0 ) + 1
      if dqFlag == "BAD":
        runInfo.setdefault( 'BAD', [] ).append( run )
      elif dqFlag not in recoDQFlags and dqFlag != 'OK' :
        runInfo.setdefault( 'Untagged', [] ).append( run )
      # Now count...
      flags = []
      if dqFlag != 'BAD':
        flags.append( '' )
        # OK in recoDQFlags means we take everything that is not BAD (reprocessing or new convention)
        if dqFlag in recoDQFlags or dqFlag == 'OK':
          flags.append( 'OK' )
      else:
        flags.append( 'Bad' )
      for flag in flags:
        info['Runs'][flag] += 1
        info['Files'][flag] += cachedFiles
        info['Events'][flag] += cachedEvents
        info['Lumi'][flag] += cachedLumi

    # Set lumi in pb-1
    for flag in info['Lumi']:
      info['Lumi'][flag] /= 1000000.
    gLogger.info( "Runs per flag:" )
    for key in runsByDQFlag:
      gLogger.info( "%s : %d" % ( key, runsByDQFlag[key] ) )
    for flag in runInfo:
      runInfo[flag].sort()
    return info, runInfo

  def __getLfnsMetadata( self, lfns ):
    lfnDict = {}
    if len( lfns ):
      gLogger.verbose( "Getting metadata for %d files" % len( lfns ) )
      for lfnChunk in breakListIntoChunks( lfns, 1000 ):
        while True:
          res = self.bk.getFileMetadata( lfnChunk )
          if not res['OK']:
            gLogger.error( "Error getting files metadata, retrying...", res['Message'] )
          else:
            break
        metadata = res['Value']['Successful']
        for lfn in lfnChunk:
          lfnDict[lfn] = {}
          for meta in ( 'EventStat', 'Luminosity', 'DQFlag', 'RunNumber' ):
            lfnDict[lfn][meta] = metadata[lfn][meta]
    return lfnDict

  def _getStatsFromBK( self, prod, fileType, runList, allReplicas ):
    bkQueryDict = { "ProductionID": prod, "FileType": fileType }
    bkStr = str( bkQueryDict )
    bkQuery = BKQuery( bkQueryDict, visible = not allReplicas )
    if allReplicas:
      bkQuery.setOption( 'ReplicaFlag', "All" )
    cached = self.cachedInfo.get( bkStr, {} )
    cachedTime = cached.get( 'Time', None )
    cachedLfns = cached.get( 'Lfns', {} )
    if isinstance( fileType, basestring ):
      fileType = [fileType]
    if set( fileType ).intersection( set( self.clearCache ) ):
      cachedTime = datetime.datetime.utcnow() - datetime.timedelta( days = 8 )
      cachedTime = None
      cachedLfns = {}
      gLogger.verbose( "Cleared cache for production %s, file type %s" % ( str( prod ), fileType ) )
    # Update if needed the cached information on LFNs
    if cachedLfns:
      lfns = [lfn for lfn in cachedLfns if cachedLfns[lfn].get( 'DQFlag' ) not in ( 'OK', 'BAD' )]
      for lfnChunk in breakListIntoChunks( lfns, 1000 ):
        #  get the DQFlag of files that are not yet OK
        while True:
          res = self.bk.getFileMetadata( lfnChunk )
          if not res['OK']:
            gLogger.error( "Error getting files metadata for cached files, bkQuery %s: %s" % ( bkStr, res['Message'] ) )
          else:
            metadata = res['Value']['Successful']
            for lfn in lfnChunk:
              cachedLfns[lfn]['DQFlag'] = metadata[lfn]['DQFlag']
            break

    # Now get the new files since last time...
    if cachedTime:
      bkQuery.setOption( 'StartDate', cachedTime.strftime( '%Y-%m-%d %H:%M:%S' ) )
    gLogger.verbose( "Getting files for BKQuery %s" % str( bkQuery ) )
    cachedTime = datetime.datetime.utcnow()
    lfns = [lfn for lfn in bkQuery.getLFNs( printOutput = False ) if lfn not in cachedLfns]
    gLogger.verbose( "Returned %d files" % len( lfns ) )
    cachedLfns.update( self.__getLfnsMetadata( lfns ) )

    self.cachedInfo[bkStr] = { 'Time':cachedTime, 'Lfns':cachedLfns }

    # Now sum up all information for the files
    info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} )
    for inf in info:
      if inf == 'Runs':
        for flag in ( 'Bad', 'OK', '' ):
          info[inf][flag] = []
      else:
        info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 )

    for lfn in cachedLfns:
      lfnInfo = cachedLfns[lfn]
      run = lfnInfo['RunNumber']
      if run in runList and run in self.cachedInfo and self.cachedInfo[run]['DQFlag'] != 'BAD':
        dqFlag = cachedLfns[lfn]['DQFlag']
        flags = []
        if dqFlag != 'BAD':
          flags.append( '' )
        else:
          flags.append( 'Bad' )
        if dqFlag == 'OK':
          flags.append( 'OK' )
        for flag in flags:
          if run not in info['Runs'][flag]:
            info['Runs'][flag].append( run )
          info['Files'][flag] += 1
          info['Events'][flag] += lfnInfo['EventStat']
          info['Lumi'][flag] += lfnInfo['Luminosity']

    runInfo = {}
    if 'BAD' in info['Runs']:
      runInfo['BAD'] = info['Runs']['BAD']
      runInfo['BAD'].sort()
    else:
      runInfo['BAD'] = []
    if '' in info['Runs'] and 'OK' in info['Runs']:
      runInfo['Untagged'] = [run for run in info['Runs'][''] if run not in info['Runs']['OK']]
      runInfo['Untagged'].sort()
    else:
      runInfo['Untagged'] = []
    # for f in info['Runs']:
    #  info['Runs'][f] = len( info['Runs'][f] )
    for flag in info['Lumi']:
      info['Lumi'][flag] /= 1000000.
    return info, runInfo

  def getPreviousStats( self, processingPass ):
    prevStats = self.cachedInfo.get( 'ProdStats', {} ).get( processingPass )
    if prevStats:
      try:
        _name = prevStats['ProdStats'][0][0].getName()
      except:
        prevStats = None
    return prevStats

  def setPreviousStats( self, processingPass, prevStats ):
    self.cachedInfo.setdefault( 'ProdStats', {} )[processingPass] = prevStats
    self.saveCache()

  def readCache( self ):
    if not os.path.exists( self.prodStatFile ):
      gLogger.info( "Created cached file %s" % self.prodStatFile )
      self.cachedInfo = {}
      self.saveCache()
      return
    fileRead = False
    while not fileRead:
      try:
        with FileLock( self.prodStatFile ):
          lFile = open( self.prodStatFile, 'r' )
          cachedVersion = pickle.load( lFile )
          startTime = time.time()
          if cachedVersion == self.cacheVersion:
            self.cachedInfo = pickle.load( lFile )
            _msgTuple = ( self.prodStatFile, time.time() - startTime )
            gLogger.info( "Loaded cached information from %s in %.3f seconds" % _msgTuple )
          else:
            _msgTuple = ( cachedVersion, self.cacheVersion )
            gLogger.info( "Incompatible versions of cache, reset information (%s, expect %s)" % _msgTuple )
            self.cachedInfo = {}
          lFile.close()
          fileRead = True
      except FileLockException, error:
        gLogger.error( "Lock exception: %s while reading pickle file %s" % ( error, self.prodStatFile ) )
      except: