示例#1
0
def getFileAncestors(inputData, ancestorDepth):
    """ Returns S_OK({inputFile1:[ancestor1,],}) or S_ERROR(<Message>) after querying the
      Bookkeeping for ancestor files.

      Input data can be an LFN string or a list of LFNs.  Ancestor depth is an integer or
      string that converts to an integer.
  """
    if not type(inputData) == type([]):
        inputData = [inputData]

    inputData = [i.replace('LFN:', '') for i in inputData]
    bk = BookkeepingClient()

    result = bk.getFileAncestors(inputData, depth=ancestorDepth, replica=True)
    gLogger.debug(result)
    if not result['OK']:
        gLogger.warn('Problem during getAncestors call:\n%s' %
                     (result['Message']))
        return result

    data = result['Value']
    if data['Failed']:
        return S_ERROR('No ancestors found for the following files:\n%s' %
                       (string.join(data['Failed'], '\n')))

    returnedInputData = data['Successful'].keys()
    if not inputData.sort() == returnedInputData.sort():
        gLogger.warn(
            'Not all ancestors returned after getAncestors call:\n%s' % result)
        return S_ERROR('Not all ancestors returned after getAncestors call')
    return S_OK(data['Successful'])
示例#2
0
class AncestorFilesAgent(OptimizerModule):
    """ Connects to BKK, ran through the optimizer
  """
    def __init__(self,
                 agentName,
                 loadName,
                 baseAgentName=False,
                 properties={}):
        """ c'tor
    """
        OptimizerModule.__init__(self, agentName, loadName, baseAgentName,
                                 properties)
        self.bk = BookkeepingClient()

    #############################################################################
    def checkJob(self, job, classadJob):
        """ The main agent execution method
    """
        result = self.__checkAncestorDepth(job, classadJob)
        if not result['OK']:
            return result

        return self.setNextOptimizer(job)

    #############################################################################
    def __checkAncestorDepth(self, job, classadJob):
        """This method checks the input data with ancestors. The original job JDL
       is always extracted to obtain the input data, therefore rescheduled jobs
       will not recursively search for ancestors of ancestors etc.
    """
        inputData = []
        if classadJob.lookupAttribute('InputData'):
            inputData = classadJob.getListFromExpression('InputData')

        if not classadJob.lookupAttribute('AncestorDepth'):
            self.log.warn('No AncestorDepth requirement found for job %s' %
                          (job))
            return S_ERROR('AncestorDepth Not Found')

        ancestorDepth = classadJob.getAttributeInt('AncestorDepth')

        if ancestorDepth == 0:
            return S_OK('Null AncestorDepth specified')

        self.log.info(
            'Job %s has %s input data files and specified ancestor depth of %s'
            % (job, len(inputData), ancestorDepth))
        result = self.__getInputDataWithAncestors(job, inputData,
                                                  ancestorDepth)
        if not result['OK']:
            return result

        newInputData = result['Value']

        classadJob.insertAttributeVectorString('InputData', newInputData)
        newJDL = classadJob.asJDL()
        result = self.__setJobInputData(job, newJDL, newInputData)
        return result

    ############################################################################
    def __getInputDataWithAncestors(self, job, inputData, ancestorDepth):
        """Extend the list of LFNs with the LFNs for their ancestor files
       for the generation depth specified in the job JDL.
    """
        inputData = [i.replace('LFN:', '') for i in inputData]
        start = time.time()
        try:
            result = self.bk.getFileAncestors(inputData,
                                              ancestorDepth,
                                              replica=True)
        except Exception as x:
            self.log.warn('getFileAncestors failed with exception:\n%s' % x)
            return S_ERROR('getFileAncestors failed with exception')

        self.log.info('BK lookup time %.2f s' % (time.time() - start))
        self.log.debug(result)
        if not result['OK']:
            report = self.setJobParam(job,
                                      self.am_getModuleParam('optimizerName'),
                                      result['Message'])
            if not report['OK']:
                self.log.warn(report['Message'])
            self.log.warn(result['Message'])
            return S_ERROR('No Ancestors Found For Input Data')

        ancestors = [
            anc['FileName']
            for ancList in result['Value']['Successful'].values()
            for anc in ancList
        ]
        newInputData = ancestors + inputData
        param = '%d ancestor files retrieved from BK for depth %s' % (
            len(ancestors), ancestorDepth)

        report = self.setJobParam(job, self.am_getModuleParam('optimizerName'),
                                  param)
        if not report['OK']:
            self.log.warn(report['Message'])

        return S_OK(newInputData)

    def __setJobInputData(self, job, jdl, inputData):
        """Sets the new job input data requirement including ancestor files.
    """
        inputData = [i.replace('LFN:', '') for i in inputData]

        result = self.jobDB.setInputData(job, inputData)
        if not result['OK']:
            self.log.error(result['Message'])
            return S_ERROR('Setting New Input Data')

        result = self.jobDB.setJobJDL(job, jdl)
        if not result['OK']:
            self.log.error(result['Message'])
            return S_ERROR('Setting New JDL')

        return S_OK('Job updated')
示例#3
0
class DiracLHCb(Dirac):

  #############################################################################
  def __init__(self, withRepo=False, repoLocation='', operationsHelperIn=None):
    """Internal initialization of the DIRAC API.
    """

    super(DiracLHCb, self).__init__(withRepo=withRepo, repoLocation=repoLocation)
    self.tier1s = []

    if not operationsHelperIn:
      self.opsH = Operations()
    else:
      self.opsH = operationsHelperIn

    self._bkQueryTemplate = {'SimulationConditions': 'All',
                             'DataTakingConditions': 'All',
                             'ProcessingPass': '******',
                             'FileType': 'All',
                             'EventType': 'All',
                             'ConfigName': 'All',
                             'ConfigVersion': 'All',
                             'Production': 0,
                             'StartRun': 0,
                             'EndRun': 0,
                             'DataQuality': 'All',
                             'Visible': 'Yes'}
    self._bkClient = BookkeepingClient()  # to expose all BK client methods indirectly

  #############################################################################
  def addRootFile(self, lfn, fullPath, diracSE, printOutput=False):
    """ Add a Root file to Grid storage, an attempt is made to retrieve the
        POOL GUID of the file prior to upload.

       Example Usage:

       >>> print dirac.addFile('/lhcb/user/p/paterson/myRootFile.tar.gz','myFile.tar.gz','CERN-USER')
       {'OK': True, 'Value':{'Failed': {},
        'Successful': {'/lhcb/user/p/paterson/test/myRootFile.tar.gz': {'put': 64.246301889419556,
                                                                    'register': 1.1102778911590576}}}}

       @param lfn: Logical File Name (LFN)
       @type lfn: string
       @param diracSE: DIRAC SE name e.g. CERN-USER
       @type diracSE: strin
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def addFile(self, lfn, fullPath, diracSE, printOutput=False):  # pylint: disable=arguments-differ
    """ Copy of addRootFile
    """
    return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE,
                                          fileGuid=makeGuid(fullPath)[fullPath],
                                          printOutput=printOutput)

  def getBKAncestors(self, lfns, depth=1, replica=True):
    """ This function allows to retrieve ancestor files from the Bookkeeping.

        Example Usage:

        >>> dirac.getBKAncestors('/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',2)
        {'OK': True, 'Value': ['/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',
        '/lhcb/data/2009/RAW/FULL/LHCb/COLLISION09/63807/063807_0000000004.raw']}

       @param lfn: Logical File Name (LFN)
       @type lfn: string or list
       @param depth: Ancestor depth
       @type depth: integer
    """

    result = self._bkClient.getFileAncestors(lfns, depth, replica=replica)
    if not result['OK']:
      self.log.error('Could not get ancestors', result['Message'])
      return result
    ancestors = set(x['FileName'] for ancestors in result['Value']['Successful'].itervalues() for x in ancestors)

    return S_OK(lfns + list(ancestors))

  #############################################################################
  def bkQueryRunsByDate(self, bkPath, startDate, endDate, dqFlag='All', selection='Runs'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ConfigurationName>/<Configuration Version>/<Condition Description><Processing Pass>/<Event Type>/<File Type>

        so an example for 2016 collisions data would be:

        /LHCb/Collision09//LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real Data/Reco16/Stripping26/90000000/EW.DST

        The startDate and endDate must be specified as yyyy-mm-dd.

        Runs can be selected based on their status e.g. the selection parameter
        has the following possible attributes:
         - Runs - data for all runs in the range are queried (default)
         - ProcessedRuns - data is retrieved for runs that are processed
         - NotProcessed - data is retrieved for runs that are not yet processed.

       Example Usage:

       >>> dirac.bkQueryRunsByDate('/LHCb/Collision16//Real Data/90000000/RAW',
                                   '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

      dirac.bkQueryRunsByDate('/LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real'
                              'Data/Reco16/Stripping26/90000000/EW.DST',
                              '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs')

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @param startDate: Start date  yyyy-mm-dd
       @param startDate: string
       @param endDate: End date  yyyy-mm-dd
       @param endDate: string
       @param selection: Either Runs, ProcessedRuns or NotProcessed
       @param selection: string
       @return: S_OK,S_ERROR
    """
    runSelection = ['Runs', 'ProcessedRuns', 'NotProcessed']
    if selection not in runSelection:
      return S_ERROR('Expected one of %s not "%s" for selection' % (', '.join(runSelection), selection))

    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkQuery = BKQuery().buildBKQuery(bkPath)
    if not bkQuery:
      return S_ERROR(
          'Please provide a BK path: '
          '/<ConfigurationName>/<Configuration Version>/<Condition Description>/<Processing Pass>'
          '/<Event Type>/<File Type>')

    if not startDate or not endDate:
      return S_ERROR('Expected both start and end dates to be defined in format: yyyy-mm-dd')

    if not isinstance(startDate, str) or not isinstance(endDate, str):
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    if not len(startDate.split('-')) == 3 or not len(endDate.split('-')) == 3:
      return S_ERROR('Expected yyyy-mm-dd string for start and end dates')

    start = time.time()
    result = self._bkClient.getRunsForAGivenPeriod({'StartDate': startDate, 'EndDate': endDate})
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)
    if not result['OK']:
      self.log.info('Could not get runs with given dates from BK with result: "%s"' % result)
      return result

    if not result['Value']:
      self.log.info('No runs selected from BK for specified dates')
      return result

    if selection not in result['Value']:
      return S_ERROR('No %s runs for specified dates' % (selection))

    runs = result['Value'][selection]
    self.log.info('Found the following %s runs:\n%s' % (len(runs), ', '.join([str(i) for i in runs])))
    # temporary until we can query for a discrete list of runs
    selectedData = []
    for run in runs:
      query = bkQuery.copy()
      query['StartRun'] = run
      query['EndRun'] = run
      query['CheckRunStatus'] = True if selection in ['ProcessedRuns', 'NotProcessed'] else False
      if dqFlag:
        check = self.__checkDQFlags(dqFlag)
        if not check['OK']:
          return check
        dqFlag = check['Value']
        query['DataQuality'] = dqFlag
      start = time.time()
      result = self._bkClient.getVisibleFilesWithMetadata(query)
      rtime = time.time() - start
      self.log.info('BK query time: %.2f sec' % rtime)
      self.log.verbose(result)
      if not result['OK']:
        return result
      self.log.info('Selected %s files for run %s' % (len(result['Value']), run))
      if result['Value']['LFNs']:
        selectedData += result['Value']['LFNs'].keys()

    self.log.info('Total files selected = %s' % (len(selectedData)))
    return S_OK(selectedData)

  #############################################################################
  def bkQueryRun(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<Run Number>/<Processing Pass>/<Event Type>/<File Type>

        so an example for 2009 collisions data would be:

       /63566/Real Data + RecoToDST-07/90000000/DST

       In addition users can specify a range of runs using the following convention:

       /<Run Number 1> - <Run Number 2>/<Processing Pass>/<Event Type>/<File Type>

       so extending the above example this would look like:

       /63566-63600/Real Data + RecoToDST-07/90000000/DST

       Example Usage:

       >>> dirac.bkQueryRun('/63566/Real Data/RecoToDST-07/90000000/DST')
       {'OK':True,'Value': ['/lhcb/data/2009/DST/00005842/0000/00005842_00000008_1.dst']}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if not len(bkPath) == 4:
      return S_ERROR('Expected 4 components to the BK path: /<Run Number>/<Processing Pass>/<Event Type>/<File Type>')

    runNumberString = bkPath[0].replace('--', '-').replace(' ', '')
    startRun = 0
    endRun = 0
    if '-' in runNumberString:
      runs = runNumberString.split('-')
      if len(runs) != 2:
        return S_ERROR('Could not determine run range from "%s", try "<Run 1> - <Run2>"' % (runNumberString))
      try:
        start = int(runs[0])
        end = int(runs[1])
      except Exception:
        return S_ERROR('Invalid run range: %s' % runNumberString)
      startRun = min(start, end)
      endRun = max(start, end)
    else:
      try:
        startRun = int(runNumberString)
        endRun = startRun
      except Exception:
        return S_ERROR('Invalid run number: %s' % runNumberString)

    query = self._bkQueryTemplate.copy()
    query['StartRun'] = startRun
    query['EndRun'] = endRun
    query['ProcessingPass'] = bkPath[1]
    query['EventType'] = bkPath[2]
    query['FileType'] = bkPath[3]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryProduction(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

        /<ProductionID>/[<Processing Pass>/<Event Type>/]<File Type>

        so an example for 2009 collisions data would be:

       /5842/Real Data + RecoToDST-07/90000000/DST

       Note that neither the processing pass nor the event type should be necessary. So either of them can be omitted

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryProduction('/5842/Real Data/RecoToDST-07/90000000/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=1)
    if len(bkPath) < 2:
      return S_ERROR('Invalid bkPath: should at least contain /ProductionID/FileType')
    query = self._bkQueryTemplate.copy()
    try:
      query['Production'] = int(bkPath[0])
    except Exception:
      return S_ERROR('Invalid production ID')
    query['FileType'] = bkPath[-1]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    for key, val in query.items():
      if isinstance(val, basestring) and val.lower() == 'all':
        query.pop(key)
    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bkQueryPath(self, bkPath, dqFlag='All'):
    """ This function allows to create and perform a BK query given a supplied
        BK path. The following BK path convention is expected:

       /<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>
       /<Processing Pass>/<Event Type>/<File Type>

       so an example for 2009 collsions data would be:

       /LHCb/Collision09/Beam450GeV-VeloOpen-MagDown/Real Data + RecoToDST-07/90000000/DST

       or for MC09 simulated data:

       /MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/2010-Sim01Reco01-withTruth/27163001/DST

       a data quality flag can also optionally be provided, the full list of these is available
       via the getAllDQFlags() method.

       Example Usage:

       >>> dirac.bkQueryPath('/MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/Sim07/Reco06-withTruth/10012004/DST')
       {'OK': True, 'Value': [<LFN1>,<LFN2>]}

       @param bkPath: BK path as described above
       @type bkPath: string
       @param dqFlag: Optional Data Quality flag
       @type dqFlag: string
       @return: S_OK,S_ERROR
    """
    if not isinstance(bkPath, str):
      return S_ERROR('Expected string for bkPath')

    # remove any double slashes, spaces must be preserved
    # remove any empty components from leading and trailing slashes
    bkPath = translateBKPath(bkPath, procPassID=3)
    if not len(bkPath) == 6:
      return S_ERROR('Expected 6 components to the BK path: '
                     '/<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>'
                     '/<Processing Pass>/<Event Type>/<File Type>')

    query = self._bkQueryTemplate.copy()
    query['ConfigName'] = bkPath[0]
    query['ConfigVersion'] = bkPath[1]
    query['ProcessingPass'] = bkPath[3]
    query['EventType'] = bkPath[4]
    query['FileType'] = bkPath[5]

    if dqFlag:
      check = self.__checkDQFlags(dqFlag)
      if not check['OK']:
        return check
      dqFlag = check['Value']
      query['DataQuality'] = dqFlag

    # The problem here is that we don't know if it's a sim or data taking condition,
    # assume that if configName=MC this is simulation
    if bkPath[0].lower() == 'mc':
      query['SimulationConditions'] = bkPath[2]
    else:
      query['DataTakingConditions'] = bkPath[2]

    result = self.bkQuery(query)
    self.log.verbose(result)
    return result

  #############################################################################
  def bookkeepingQuery(self, SimulationConditions='All', DataTakingConditions='All',
                       ProcessingPass='******', FileType='All', EventType='All', ConfigName='All',
                       ConfigVersion='All', ProductionID=0, DataQuality='ALL'):
    """ This function will create and perform a BK query using the supplied arguments
        and return a list of LFNs.

        Example Usage:

        >>> dirac.bookkeepingQuery(ConfigName='LHCb',ConfigVersion='Collision09',
        EventType='90000000',ProcessingPass='******',DataTakingConditions='Beam450GeV-VeloOpen-MagDown')
        {'OK':True,'Value':<files>}

       @param  ConfigName: BK ConfigName
       @type ConfigName: string
       @param  EventType: BK EventType
       @type EventType: string
       @param  FileType: BK FileType
       @type FileType: string
       @param  ProcessingPass: BK ProcessingPass
       @type ProcessingPass: string
       @param  ProductionID: BK ProductionID
       @type ProductionID: integer
       @param  DataQuality: BK DataQuality
       @type DataQuality: string
       @param  ConfigVersion: BK ConfigVersion
       @type ConfigVersion: string
       @param  DataTakingConditions: BK DataTakingConditions
       @type DataTakingConditions: string
       @param  SimulationConditions: BK SimulationConditions
       @type SimulationConditions: string
       @return: S_OK,S_ERROR
    """
    query = self._bkQueryTemplate.copy()
    query['SimulationConditions'] = SimulationConditions
    query['DataTakingConditions'] = DataTakingConditions
    query['ProcessingPass'] = ProcessingPass
    query['FileType'] = FileType
    query['EventType'] = EventType
    query['ConfigName'] = ConfigName
    query['ConfigVersion'] = ConfigVersion
    query['Production'] = ProductionID
    query['DataQuality'] = DataQuality
    return self.bkQuery(query)

  #############################################################################
  def bkQuery(self, bkQueryDict):
    """ Developer function. Perform a query to the LHCb Bookkeeping to return
        a list of LFN(s). This method takes a BK query dictionary.

        Example Usage:

        >>> print dirac.bkQuery(query)
        {'OK':True,'Value':<files>}

       @param bkQueryDict: BK query
       @type bkQueryDict: dictionary (see bookkeepingQuery() for keys)
       @return: S_OK,S_ERROR
    """
    problematicFields = []
    # Remove the Visible flag as anyway the method is for visible files ;-)
    # bkQueryDict.setdefault( 'Visible', 'Yes' )
    for name, value in bkQueryDict.items():
      if name not in self._bkQueryTemplate:
        problematicFields.append(name)

    if problematicFields:
      msg = 'The following fields are not valid for a BK query: %s\nValid fields include: %s' % \
            (', '.join(problematicFields), ', '.join(self._bkQueryTemplate.keys()))
      return S_ERROR(msg)

    for name, value in bkQueryDict.items():
      if name == "Production" or name == "EventType" or name == "StartRun" or name == "EndRun":
        if value == 0:
          del bkQueryDict[name]
        else:
          bkQueryDict[name] = str(value)
      elif name == "FileType":
        if value.lower() == "all":
          bkQueryDict[name] = 'ALL'
      else:
        if str(value).lower() == "all":
          del bkQueryDict[name]

    if 'Production' in bkQueryDict or 'StartRun' in bkQueryDict or 'EndRun' in bkQueryDict:
      self.log.verbose('Found a specific query so loosening some restrictions to prevent BK overloading')
    else:
      if 'SimulationConditions' not in bkQueryDict and 'DataTakingConditions' not in bkQueryDict:
        return S_ERROR('A Simulation or DataTaking Condition must be specified for a BK query.')
      if 'EventType' not in bkQueryDict and 'ConfigName' not in bkQueryDict and 'ConfigVersion' not in bkQueryDict:
        return S_ERROR(
            'The minimal set of BK fields for a query is: EventType, ConfigName and ConfigVersion'
            ' in addition to a Simulation or DataTaking Condition')

    self.log.verbose('Final BK query dictionary is:')
    for item in bkQueryDict.iteritems():
      self.log.verbose('%s : %s' % item)

    start = time.time()
    result = self._bkClient.getVisibleFilesWithMetadata(bkQueryDict)
#    result = bk.getFilesWithGivenDataSets(bkQueryDict)
    rtime = time.time() - start
    self.log.info('BK query time: %.2f sec' % rtime)

    if not result['OK']:
      return S_ERROR('BK query returned an error: "%s"' % (result['Message']))

    if not result['Value']:
      return self._errorReport('No BK files selected')

    returnedFiles = len(result['Value'])
    self.log.verbose('%s files selected from the BK' % (returnedFiles))
    return result

  #############################################################################
  def __checkDQFlags(self, flags):
    """ Internal function.  Checks the provided flags against the list of
        possible DQ flag statuses from the Bookkeeping.
    """
    dqFlags = []
    if isinstance(flags, list):
      dqFlags = flags
    else:
      dqFlags = [flags]

    bkFlags = self.getAllDQFlags()
    if not bkFlags['OK']:
      return bkFlags

    final = []
    for flag in dqFlags:
      if flag.lower() == 'all':
        final.append(flag.upper())
      else:
        flag = flag.upper()
        if flag not in bkFlags['Value']:
          msg = 'Specified DQ flag "%s" is not in allowed list: %s' % (flag, ', '.join(bkFlags['Value']))
          self.log.error(msg)
          return S_ERROR(msg)
        else:
          final.append(flag)

    # when first coding this it was not possible to use a list ;)
    if len(final) == 1:
      final = final[0]

    return S_OK(final)

  #############################################################################
  def getAllDQFlags(self, printOutput=False):
    """ Helper function.  Returns the list of possible DQ flag statuses
        from the Bookkeeping.

        Example Usage:

        >>> print dirac.getAllDQFlags()
        {'OK':True,'Value':<flags>}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    result = self._bkClient.getAvailableDataQuality()
    if not result['OK']:
      self.log.error('Could not obtain possible DQ flags from BK with result:\n%s' % (result))
      return result

    if printOutput:
      flags = result['Value']
      self.log.info('Possible DQ flags from BK are: %s' % (', '.join(flags)))

    return result

  #############################################################################
  def getDataByRun(self, lfns, printOutput=False):
    """Sort the supplied lfn list by run. An S_OK object will be returned
       containing a dictionary of runs and the corresponding list of LFN(s)
       associated with them.

       Example usage:

       >>> print dirac.getDataByRun(lfns)
       {'OK': True, 'Value': {<RUN>:['<LFN>','<LFN>',...], <RUN>:['<LFN>',..]}}


       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    runDict = {}
    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    for lfn, metadata in result['Value']['Successful'].items():
      if 'RunNumber' in metadata:
        runNumber = metadata['RunNumber']
        runDict.setdefault(runNumber, []).append(lfn)
      else:
        self.log.warn('Could not find run number from BK for %s' % (lfn))

    if printOutput:
      self.log.notice(self.pPrint.pformat(runDict))

    return S_OK(runDict)

  #############################################################################
  def bkMetadata(self, lfns, printOutput=False):
    """Return metadata for the supplied lfn list. An S_OK object will be returned
       containing a dictionary of LFN(s) and the corresponding metadata associated
       with them.

       Example usage:

       >>> print dirac.bkMetadata(lfns)
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param lfns: Logical File Name(s)
       @type lfns: list
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except ValueError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    start = time.time()
    result = self._bkClient.getFileMetadata(lfns)
    self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start))
    if not result['OK']:
      self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message']))
      return result

    if printOutput:
      self.log.notice(self.pPrint.pformat(result['Value']))

    return result

  #############################################################################

  def lhcbProxyInit(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-init script
    """
    os.system("dirac-proxy-init -o LogLevel=NOTICE -t --rfc %s" % "' '".join(args))

  #############################################################################

  def lhcbProxyInfo(self, *args):  # pylint: disable=no-self-use
    """ just calling the dirac-proxy-info script
    """
    os.system("dirac-proxy-info -o LogLevel=NOTICE %s" % "' '".join(args))
  #############################################################################

  def gridWeather(self, printOutput=False):
    """This method gives a snapshot of the current Grid weather from the perspective
       of the DIRAC site and SE masks.  Tier-1 sites are returned with more detailed
       information.

       Example usage:

       >>> print dirac.gridWeather()
       {'OK': True, 'Value': {{'Sites':<siteInfo>,'SEs':<seInfo>,'Tier-1s':<tierInfo>}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    lcgSites = gConfig.getSections('/Resources/Sites/LCG')
    if not lcgSites['OK']:
      return lcgSites

    for lcgSite in lcgSites['Value']:

      tier = gConfig.getValue('/Resources/Sites/LCG/%s/MoUTierLevel' % lcgSite, 2)
      if tier in (0, 1):
        self.tier1s.append(lcgSite)

    siteInfo = self.checkSites()
    if not siteInfo['OK']:
      return siteInfo
    siteInfo = siteInfo['Value']

    seInfo = self.checkSEs()
    if not seInfo['OK']:
      return seInfo
    seInfo = seInfo['Value']

    tierSEs = {}
    for site in self.tier1s:
      tierSEs[site] = getSEsForSite(site)['Value']

    tierInfo = {}
    for site, seList in tierSEs.items():
      tierInfo[site] = {}
      for se in seList:
        if se in seInfo:
          tierSEInfo = seInfo[se]
          tierInfo[site][se] = tierSEInfo
      if site in siteInfo['AllowedSites']:
        tierInfo[site]['MaskStatus'] = 'Allowed'
      else:
        tierInfo[site]['MaskStatus'] = 'Banned'

    if printOutput:
      self.log.notice('========> Tier-1 status in DIRAC site and SE masks')
      for site in sorted(self.tier1s):
        self.log.notice('\n====> %s is %s in site mask\n' % (site, tierInfo[site]['MaskStatus']))
        self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))
        for se in sorted(tierSEs[site]):
          if se in tierInfo[site]:
            self.log.notice('%s %s %s' % (se.ljust(25),
                                          tierInfo[site][se]['ReadStatus'].rjust(15),
                                          tierInfo[site][se]['WriteStatus'].rjust(15))
                            )

      self.log.notice('\n========> Tier-2 status in DIRAC site mask\n')
      allowedSites = siteInfo['AllowedSites']
      bannedSites = siteInfo['BannedSites']
      for site in self.tier1s:
        if site in allowedSites:
          allowedSites.remove(site)
        if site in bannedSites:
          bannedSites.remove(site)
      self.log.notice(' %s sites are in the site mask, %s are banned.\n' % (len(allowedSites), len(bannedSites)))

    summary = {'Sites': siteInfo, 'SEs': seInfo, 'Tier-1s': tierInfo}
    return S_OK(summary)

  #############################################################################
  def checkSites(self, printOutput=False):  # pylint: disable=no-self-use
    """Return the list of sites in the DIRAC site mask and those which are banned.

       Example usage:

       >>> print dirac.checkSites()
       {'OK': True, 'Value': {'AllowedSites':['<Site>',...],'BannedSites':[]}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """

    res = getSites()
    if not res['OK']:
      self.log.error('Could not get list of sites from CS', res['Message'])
      return res
    totalList = res['Value']

    res = DiracAdmin().getSiteMask()
    if not res['OK']:
      return res

    sites = res['Value']
    bannedSites = []
    for site in totalList:
      if site not in sites:
        bannedSites.append(site)

    if printOutput:
      self.log.notice('\n========> Allowed Sites\n')
      self.log.notice('\n'.join(sites))
      self.log.notice('\n========> Banned Sites\n')
      self.log.notice('\n'.join(bannedSites))
      self.log.notice('\nThere is a total of %s allowed sites and %s banned sites in the system.' % (len(sites),
                                                                                                     len(bannedSites)))

    return S_OK({'AllowedSites': sites, 'BannedSites': bannedSites})

  #############################################################################
  def checkSEs(self, printOutput=False):  # pylint: disable=no-self-use
    """Check the status of read and write operations in the DIRAC SE mask.

       Example usage:

       >>> print dirac.checkSEs()
       {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}}

       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    res = gConfig.getSections('/Resources/StorageElements', True)

    if not res['OK']:
      self.log.error('Failed to get storage element information', res['Message'])
      return res

    if printOutput:
      self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15)))

    seList = sorted(res['Value'])
    result = {}
    rss = ResourceStatus()
    for se in seList:
      res = rss.getElementStatus(se, 'StorageElement')
      if not res['OK']:
        self.log.error("Failed to get StorageElement status for %s" % se)
      else:
        readState = res['Value'].get('ReadAccess', 'Active')
        writeState = res['Value'].get('WriteAccess', 'Active')
        result[se] = {'ReadStatus': readState, 'WriteStatus': writeState}
        if printOutput:
          self.log.notice('%s %s %s' % (se.ljust(25), readState.rjust(15), writeState.rjust(15)))

    return S_OK(result)

  def splitInputDataBySize(self, lfns, maxSizePerJob=20, printOutput=False):
    """Split the supplied lfn list by the replicas present at the possible
       destination sites, based on a maximum size.
       An S_OK object will be returned containing a list of
       lists in order to create the jobs.

       Example usage:

       >>> d.splitInputDataBySize(lfns,10)
       {'OK': True, 'Value': [['<LFN>'], ['<LFN>']]}


       @param lfns: Logical File Name(s) to split
       @type lfns: list
       @param maxSizePerJob: Maximum size (in GB) per bunch
       @type maxSizePerJob: integer
       @param printOutput: Optional flag to print result
       @type printOutput: boolean
       @return: S_OK,S_ERROR
    """
    sitesForSE = {}
    if isinstance(lfns, str):
      lfns = [lfns.replace('LFN:', '')]
    elif isinstance(lfns, list):
      try:
        lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns]
      except TypeError as x:
        return self._errorReport(str(x), 'Expected strings for LFNs')
    else:
      return self._errorReport('Expected single string or list of strings for LFN(s)')

    if not isinstance(maxSizePerJob, int):
      try:
        maxSizePerJob = int(maxSizePerJob)
      except ValueError as x:
        return self._errorReport(str(x), 'Expected integer for maxSizePerJob')
    maxSizePerJob *= 1000 * 1000 * 1000

    replicaDict = self.getReplicas(lfns)
    if not replicaDict['OK']:
      return replicaDict
    replicas = replicaDict['Value']['Successful']
    if not replicas:
      return self._errorReport(replicaDict['Value']['Failed'].items()[0],
                               'Failed to get replica information')
    siteLfns = {}
    for lfn, reps in replicas.items():
      possibleSites = set(site
                          for se in reps
                          for site in sitesForSE.setdefault(se, getSitesForSE(se).get('Value', [])))
      siteLfns.setdefault(','.join(sorted(possibleSites)), []).append(lfn)

    if '' in siteLfns:
      # Some files don't have active replicas
      return self._errorReport('No active replica found for', str(siteLfns['']))
    # Get size of files
    metadataDict = self.getLfnMetadata(lfns, printOutput)
    if not metadataDict['OK']:
      return metadataDict
    fileSizes = dict((lfn, metadataDict['Value']['Successful'].get(lfn, {}).get('Size', maxSizePerJob))
                     for lfn in lfns)

    lfnGroups = []
    # maxSize is in GB
    for files in siteLfns.values():
      # Now get bunches of files,
      # Sort in decreasing size
      files.sort(cmp=(lambda f1, f2: fileSizes[f2] - fileSizes[f1]))
      while files:
        # print [( lfn, fileSizes[lfn] ) for lfn in files]
        group = []
        sizeTot = 0
        for lfn in list(files):
          size = fileSizes[lfn]
          if size >= maxSizePerJob:
            lfnGroups.append([lfn])
          elif sizeTot + size < maxSizePerJob:
            sizeTot += size
            group.append(lfn)
            files.remove(lfn)
        if group:
          lfnGroups.append(group)

    if printOutput:
      self.log.notice(self.pPrint.pformat(lfnGroups))
    return S_OK(lfnGroups)

    #############################################################################

  def getAccessURL(self, lfn, storageElement, protocol=None, printOutput=False):
    """Allows to retrieve an access URL for an LFN replica given a valid DIRAC SE
       name.  Contacts the file catalog and contacts the site SRM endpoint behind
       the scenes.

       Example Usage:

       >>> print dirac.getAccessURL('/lhcb/data/CCRC08/DST/00000151/0000/00000151_00004848_2.dst','CERN-RAW')
       {'OK': True, 'Value': {'Successful': {'srm://...': {'SRM2': 'rfio://...'}}, 'Failed': {}}}

       :param lfn: Logical File Name (LFN)
       :type lfn: str or python:list
       :param storageElement: DIRAC SE name e.g. CERN-RAW
       :type storageElement: string
       :param printOutput: Optional flag to print result
       :type printOutput: boolean
       :returns: S_OK,S_ERROR
    """
    ret = self._checkFileArgument(lfn, 'LFN')
    if not ret['OK']:
      return ret
    lfn = ret['Value']
    if isinstance(lfn, basestring):
      lfn = [lfn]
    results = getAccessURL(lfn, storageElement, protocol=protocol)
    if printOutput:
      printDMResult(results, empty="File not at SE", script="dirac-dms-lfn-accessURL")
    return results

  #############################################################################

  def _getLocalInputData(self, parameters):
    """ LHCb extension of DIRAC API's _getLocalInputData. Only used for handling ancestors.
    """
    inputData = parameters.get('InputData')
    if inputData:
      self.log.debug("DiracLHCb._getLocalInputData. InputData: %s" % inputData)
      if isinstance(inputData, basestring):
        inputData = inputData.split(';')
      inputData = [lfn.strip('LFN:') for lfn in inputData]
      ancestorsDepth = int(parameters.get('AncestorDepth', 0))
      if ancestorsDepth:
        self.log.debug("DiracLHCb._getLocalInputData. ancestorsDepth: %d" % ancestorsDepth)
        res = self._bkClient.getFileAncestors(inputData, ancestorsDepth)
        if not res['OK']:
          self.log.error("Can't get ancestors", res['Message'])
          return res
        ancestorsLFNs = []
        for ancestorsLFN in res['Value']['Successful'].itervalues():
          ancestorsLFNs += [i['FileName'] for i in ancestorsLFN]
        self.log.info("DiracLHCb._getLocalInputData: adding %d ancestors" % len(ancestorsLFNs))
        self.log.verbose("%s", ', '.join(ancestorsLFNs))
        inputData += ancestorsLFNs

    return S_OK(inputData)