def getFileAncestors(inputData, ancestorDepth): """ Returns S_OK({inputFile1:[ancestor1,],}) or S_ERROR(<Message>) after querying the Bookkeeping for ancestor files. Input data can be an LFN string or a list of LFNs. Ancestor depth is an integer or string that converts to an integer. """ if not type(inputData) == type([]): inputData = [inputData] inputData = [i.replace('LFN:', '') for i in inputData] bk = BookkeepingClient() result = bk.getFileAncestors(inputData, depth=ancestorDepth, replica=True) gLogger.debug(result) if not result['OK']: gLogger.warn('Problem during getAncestors call:\n%s' % (result['Message'])) return result data = result['Value'] if data['Failed']: return S_ERROR('No ancestors found for the following files:\n%s' % (string.join(data['Failed'], '\n'))) returnedInputData = data['Successful'].keys() if not inputData.sort() == returnedInputData.sort(): gLogger.warn( 'Not all ancestors returned after getAncestors call:\n%s' % result) return S_ERROR('Not all ancestors returned after getAncestors call') return S_OK(data['Successful'])
class AncestorFilesAgent(OptimizerModule): """ Connects to BKK, ran through the optimizer """ def __init__(self, agentName, loadName, baseAgentName=False, properties={}): """ c'tor """ OptimizerModule.__init__(self, agentName, loadName, baseAgentName, properties) self.bk = BookkeepingClient() ############################################################################# def checkJob(self, job, classadJob): """ The main agent execution method """ result = self.__checkAncestorDepth(job, classadJob) if not result['OK']: return result return self.setNextOptimizer(job) ############################################################################# def __checkAncestorDepth(self, job, classadJob): """This method checks the input data with ancestors. The original job JDL is always extracted to obtain the input data, therefore rescheduled jobs will not recursively search for ancestors of ancestors etc. """ inputData = [] if classadJob.lookupAttribute('InputData'): inputData = classadJob.getListFromExpression('InputData') if not classadJob.lookupAttribute('AncestorDepth'): self.log.warn('No AncestorDepth requirement found for job %s' % (job)) return S_ERROR('AncestorDepth Not Found') ancestorDepth = classadJob.getAttributeInt('AncestorDepth') if ancestorDepth == 0: return S_OK('Null AncestorDepth specified') self.log.info( 'Job %s has %s input data files and specified ancestor depth of %s' % (job, len(inputData), ancestorDepth)) result = self.__getInputDataWithAncestors(job, inputData, ancestorDepth) if not result['OK']: return result newInputData = result['Value'] classadJob.insertAttributeVectorString('InputData', newInputData) newJDL = classadJob.asJDL() result = self.__setJobInputData(job, newJDL, newInputData) return result ############################################################################ def __getInputDataWithAncestors(self, job, inputData, ancestorDepth): """Extend the list of LFNs with the LFNs for their ancestor files for the generation depth specified in the job JDL. """ inputData = [i.replace('LFN:', '') for i in inputData] start = time.time() try: result = self.bk.getFileAncestors(inputData, ancestorDepth, replica=True) except Exception as x: self.log.warn('getFileAncestors failed with exception:\n%s' % x) return S_ERROR('getFileAncestors failed with exception') self.log.info('BK lookup time %.2f s' % (time.time() - start)) self.log.debug(result) if not result['OK']: report = self.setJobParam(job, self.am_getModuleParam('optimizerName'), result['Message']) if not report['OK']: self.log.warn(report['Message']) self.log.warn(result['Message']) return S_ERROR('No Ancestors Found For Input Data') ancestors = [ anc['FileName'] for ancList in result['Value']['Successful'].values() for anc in ancList ] newInputData = ancestors + inputData param = '%d ancestor files retrieved from BK for depth %s' % ( len(ancestors), ancestorDepth) report = self.setJobParam(job, self.am_getModuleParam('optimizerName'), param) if not report['OK']: self.log.warn(report['Message']) return S_OK(newInputData) def __setJobInputData(self, job, jdl, inputData): """Sets the new job input data requirement including ancestor files. """ inputData = [i.replace('LFN:', '') for i in inputData] result = self.jobDB.setInputData(job, inputData) if not result['OK']: self.log.error(result['Message']) return S_ERROR('Setting New Input Data') result = self.jobDB.setJobJDL(job, jdl) if not result['OK']: self.log.error(result['Message']) return S_ERROR('Setting New JDL') return S_OK('Job updated')
class DiracLHCb(Dirac): ############################################################################# def __init__(self, withRepo=False, repoLocation='', operationsHelperIn=None): """Internal initialization of the DIRAC API. """ super(DiracLHCb, self).__init__(withRepo=withRepo, repoLocation=repoLocation) self.tier1s = [] if not operationsHelperIn: self.opsH = Operations() else: self.opsH = operationsHelperIn self._bkQueryTemplate = {'SimulationConditions': 'All', 'DataTakingConditions': 'All', 'ProcessingPass': '******', 'FileType': 'All', 'EventType': 'All', 'ConfigName': 'All', 'ConfigVersion': 'All', 'Production': 0, 'StartRun': 0, 'EndRun': 0, 'DataQuality': 'All', 'Visible': 'Yes'} self._bkClient = BookkeepingClient() # to expose all BK client methods indirectly ############################################################################# def addRootFile(self, lfn, fullPath, diracSE, printOutput=False): """ Add a Root file to Grid storage, an attempt is made to retrieve the POOL GUID of the file prior to upload. Example Usage: >>> print dirac.addFile('/lhcb/user/p/paterson/myRootFile.tar.gz','myFile.tar.gz','CERN-USER') {'OK': True, 'Value':{'Failed': {}, 'Successful': {'/lhcb/user/p/paterson/test/myRootFile.tar.gz': {'put': 64.246301889419556, 'register': 1.1102778911590576}}}} @param lfn: Logical File Name (LFN) @type lfn: string @param diracSE: DIRAC SE name e.g. CERN-USER @type diracSE: strin @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE, fileGuid=makeGuid(fullPath)[fullPath], printOutput=printOutput) def addFile(self, lfn, fullPath, diracSE, printOutput=False): # pylint: disable=arguments-differ """ Copy of addRootFile """ return super(DiracLHCb, self).addFile(lfn, fullPath, diracSE, fileGuid=makeGuid(fullPath)[fullPath], printOutput=printOutput) def getBKAncestors(self, lfns, depth=1, replica=True): """ This function allows to retrieve ancestor files from the Bookkeeping. Example Usage: >>> dirac.getBKAncestors('/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst',2) {'OK': True, 'Value': ['/lhcb/data/2009/DST/00005727/0000/00005727_00000042_1.dst', '/lhcb/data/2009/RAW/FULL/LHCb/COLLISION09/63807/063807_0000000004.raw']} @param lfn: Logical File Name (LFN) @type lfn: string or list @param depth: Ancestor depth @type depth: integer """ result = self._bkClient.getFileAncestors(lfns, depth, replica=replica) if not result['OK']: self.log.error('Could not get ancestors', result['Message']) return result ancestors = set(x['FileName'] for ancestors in result['Value']['Successful'].itervalues() for x in ancestors) return S_OK(lfns + list(ancestors)) ############################################################################# def bkQueryRunsByDate(self, bkPath, startDate, endDate, dqFlag='All', selection='Runs'): """ This function allows to create and perform a BK query given a supplied BK path. The following BK path convention is expected: /<ConfigurationName>/<Configuration Version>/<Condition Description><Processing Pass>/<Event Type>/<File Type> so an example for 2016 collisions data would be: /LHCb/Collision09//LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real Data/Reco16/Stripping26/90000000/EW.DST The startDate and endDate must be specified as yyyy-mm-dd. Runs can be selected based on their status e.g. the selection parameter has the following possible attributes: - Runs - data for all runs in the range are queried (default) - ProcessedRuns - data is retrieved for runs that are processed - NotProcessed - data is retrieved for runs that are not yet processed. Example Usage: >>> dirac.bkQueryRunsByDate('/LHCb/Collision16//Real Data/90000000/RAW', '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs') {'OK': True, 'Value': [<LFN1>,<LFN2>]} dirac.bkQueryRunsByDate('/LHCb/Collision16/Beam6500GeV-VeloClosed-MagDown/Real' 'Data/Reco16/Stripping26/90000000/EW.DST', '2016-08-20','2016-08-22',dqFlag='OK',selection='Runs') @param bkPath: BK path as described above @type bkPath: string @param dqFlag: Optional Data Quality flag @type dqFlag: string @param startDate: Start date yyyy-mm-dd @param startDate: string @param endDate: End date yyyy-mm-dd @param endDate: string @param selection: Either Runs, ProcessedRuns or NotProcessed @param selection: string @return: S_OK,S_ERROR """ runSelection = ['Runs', 'ProcessedRuns', 'NotProcessed'] if selection not in runSelection: return S_ERROR('Expected one of %s not "%s" for selection' % (', '.join(runSelection), selection)) if not isinstance(bkPath, str): return S_ERROR('Expected string for bkPath') # remove any double slashes, spaces must be preserved # remove any empty components from leading and trailing slashes bkQuery = BKQuery().buildBKQuery(bkPath) if not bkQuery: return S_ERROR( 'Please provide a BK path: ' '/<ConfigurationName>/<Configuration Version>/<Condition Description>/<Processing Pass>' '/<Event Type>/<File Type>') if not startDate or not endDate: return S_ERROR('Expected both start and end dates to be defined in format: yyyy-mm-dd') if not isinstance(startDate, str) or not isinstance(endDate, str): return S_ERROR('Expected yyyy-mm-dd string for start and end dates') if not len(startDate.split('-')) == 3 or not len(endDate.split('-')) == 3: return S_ERROR('Expected yyyy-mm-dd string for start and end dates') start = time.time() result = self._bkClient.getRunsForAGivenPeriod({'StartDate': startDate, 'EndDate': endDate}) rtime = time.time() - start self.log.info('BK query time: %.2f sec' % rtime) if not result['OK']: self.log.info('Could not get runs with given dates from BK with result: "%s"' % result) return result if not result['Value']: self.log.info('No runs selected from BK for specified dates') return result if selection not in result['Value']: return S_ERROR('No %s runs for specified dates' % (selection)) runs = result['Value'][selection] self.log.info('Found the following %s runs:\n%s' % (len(runs), ', '.join([str(i) for i in runs]))) # temporary until we can query for a discrete list of runs selectedData = [] for run in runs: query = bkQuery.copy() query['StartRun'] = run query['EndRun'] = run query['CheckRunStatus'] = True if selection in ['ProcessedRuns', 'NotProcessed'] else False if dqFlag: check = self.__checkDQFlags(dqFlag) if not check['OK']: return check dqFlag = check['Value'] query['DataQuality'] = dqFlag start = time.time() result = self._bkClient.getVisibleFilesWithMetadata(query) rtime = time.time() - start self.log.info('BK query time: %.2f sec' % rtime) self.log.verbose(result) if not result['OK']: return result self.log.info('Selected %s files for run %s' % (len(result['Value']), run)) if result['Value']['LFNs']: selectedData += result['Value']['LFNs'].keys() self.log.info('Total files selected = %s' % (len(selectedData))) return S_OK(selectedData) ############################################################################# def bkQueryRun(self, bkPath, dqFlag='All'): """ This function allows to create and perform a BK query given a supplied BK path. The following BK path convention is expected: /<Run Number>/<Processing Pass>/<Event Type>/<File Type> so an example for 2009 collisions data would be: /63566/Real Data + RecoToDST-07/90000000/DST In addition users can specify a range of runs using the following convention: /<Run Number 1> - <Run Number 2>/<Processing Pass>/<Event Type>/<File Type> so extending the above example this would look like: /63566-63600/Real Data + RecoToDST-07/90000000/DST Example Usage: >>> dirac.bkQueryRun('/63566/Real Data/RecoToDST-07/90000000/DST') {'OK':True,'Value': ['/lhcb/data/2009/DST/00005842/0000/00005842_00000008_1.dst']} @param bkPath: BK path as described above @type bkPath: string @param dqFlag: Optional Data Quality flag @type dqFlag: string @return: S_OK,S_ERROR """ if not isinstance(bkPath, str): return S_ERROR('Expected string for bkPath') # remove any double slashes, spaces must be preserved # remove any empty components from leading and trailing slashes bkPath = translateBKPath(bkPath, procPassID=1) if not len(bkPath) == 4: return S_ERROR('Expected 4 components to the BK path: /<Run Number>/<Processing Pass>/<Event Type>/<File Type>') runNumberString = bkPath[0].replace('--', '-').replace(' ', '') startRun = 0 endRun = 0 if '-' in runNumberString: runs = runNumberString.split('-') if len(runs) != 2: return S_ERROR('Could not determine run range from "%s", try "<Run 1> - <Run2>"' % (runNumberString)) try: start = int(runs[0]) end = int(runs[1]) except Exception: return S_ERROR('Invalid run range: %s' % runNumberString) startRun = min(start, end) endRun = max(start, end) else: try: startRun = int(runNumberString) endRun = startRun except Exception: return S_ERROR('Invalid run number: %s' % runNumberString) query = self._bkQueryTemplate.copy() query['StartRun'] = startRun query['EndRun'] = endRun query['ProcessingPass'] = bkPath[1] query['EventType'] = bkPath[2] query['FileType'] = bkPath[3] if dqFlag: check = self.__checkDQFlags(dqFlag) if not check['OK']: return check dqFlag = check['Value'] query['DataQuality'] = dqFlag result = self.bkQuery(query) self.log.verbose(result) return result ############################################################################# def bkQueryProduction(self, bkPath, dqFlag='All'): """ This function allows to create and perform a BK query given a supplied BK path. The following BK path convention is expected: /<ProductionID>/[<Processing Pass>/<Event Type>/]<File Type> so an example for 2009 collisions data would be: /5842/Real Data + RecoToDST-07/90000000/DST Note that neither the processing pass nor the event type should be necessary. So either of them can be omitted a data quality flag can also optionally be provided, the full list of these is available via the getAllDQFlags() method. Example Usage: >>> dirac.bkQueryProduction('/5842/Real Data/RecoToDST-07/90000000/DST') {'OK': True, 'Value': [<LFN1>,<LFN2>]} @param bkPath: BK path as described above @type bkPath: string @param dqFlag: Optional Data Quality flag @type dqFlag: string @return: S_OK,S_ERROR """ if not isinstance(bkPath, str): return S_ERROR('Expected string for bkPath') # remove any double slashes, spaces must be preserved # remove any empty components from leading and trailing slashes bkPath = translateBKPath(bkPath, procPassID=1) if len(bkPath) < 2: return S_ERROR('Invalid bkPath: should at least contain /ProductionID/FileType') query = self._bkQueryTemplate.copy() try: query['Production'] = int(bkPath[0]) except Exception: return S_ERROR('Invalid production ID') query['FileType'] = bkPath[-1] if dqFlag: check = self.__checkDQFlags(dqFlag) if not check['OK']: return check dqFlag = check['Value'] query['DataQuality'] = dqFlag for key, val in query.items(): if isinstance(val, basestring) and val.lower() == 'all': query.pop(key) result = self.bkQuery(query) self.log.verbose(result) return result ############################################################################# def bkQueryPath(self, bkPath, dqFlag='All'): """ This function allows to create and perform a BK query given a supplied BK path. The following BK path convention is expected: /<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition> /<Processing Pass>/<Event Type>/<File Type> so an example for 2009 collsions data would be: /LHCb/Collision09/Beam450GeV-VeloOpen-MagDown/Real Data + RecoToDST-07/90000000/DST or for MC09 simulated data: /MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/2010-Sim01Reco01-withTruth/27163001/DST a data quality flag can also optionally be provided, the full list of these is available via the getAllDQFlags() method. Example Usage: >>> dirac.bkQueryPath('/MC/2010/Beam3500GeV-VeloClosed-MagDown-Nu1/Sim07/Reco06-withTruth/10012004/DST') {'OK': True, 'Value': [<LFN1>,<LFN2>]} @param bkPath: BK path as described above @type bkPath: string @param dqFlag: Optional Data Quality flag @type dqFlag: string @return: S_OK,S_ERROR """ if not isinstance(bkPath, str): return S_ERROR('Expected string for bkPath') # remove any double slashes, spaces must be preserved # remove any empty components from leading and trailing slashes bkPath = translateBKPath(bkPath, procPassID=3) if not len(bkPath) == 6: return S_ERROR('Expected 6 components to the BK path: ' '/<ConfigurationName>/<Configuration Version>/<Sim or Data Taking Condition>' '/<Processing Pass>/<Event Type>/<File Type>') query = self._bkQueryTemplate.copy() query['ConfigName'] = bkPath[0] query['ConfigVersion'] = bkPath[1] query['ProcessingPass'] = bkPath[3] query['EventType'] = bkPath[4] query['FileType'] = bkPath[5] if dqFlag: check = self.__checkDQFlags(dqFlag) if not check['OK']: return check dqFlag = check['Value'] query['DataQuality'] = dqFlag # The problem here is that we don't know if it's a sim or data taking condition, # assume that if configName=MC this is simulation if bkPath[0].lower() == 'mc': query['SimulationConditions'] = bkPath[2] else: query['DataTakingConditions'] = bkPath[2] result = self.bkQuery(query) self.log.verbose(result) return result ############################################################################# def bookkeepingQuery(self, SimulationConditions='All', DataTakingConditions='All', ProcessingPass='******', FileType='All', EventType='All', ConfigName='All', ConfigVersion='All', ProductionID=0, DataQuality='ALL'): """ This function will create and perform a BK query using the supplied arguments and return a list of LFNs. Example Usage: >>> dirac.bookkeepingQuery(ConfigName='LHCb',ConfigVersion='Collision09', EventType='90000000',ProcessingPass='******',DataTakingConditions='Beam450GeV-VeloOpen-MagDown') {'OK':True,'Value':<files>} @param ConfigName: BK ConfigName @type ConfigName: string @param EventType: BK EventType @type EventType: string @param FileType: BK FileType @type FileType: string @param ProcessingPass: BK ProcessingPass @type ProcessingPass: string @param ProductionID: BK ProductionID @type ProductionID: integer @param DataQuality: BK DataQuality @type DataQuality: string @param ConfigVersion: BK ConfigVersion @type ConfigVersion: string @param DataTakingConditions: BK DataTakingConditions @type DataTakingConditions: string @param SimulationConditions: BK SimulationConditions @type SimulationConditions: string @return: S_OK,S_ERROR """ query = self._bkQueryTemplate.copy() query['SimulationConditions'] = SimulationConditions query['DataTakingConditions'] = DataTakingConditions query['ProcessingPass'] = ProcessingPass query['FileType'] = FileType query['EventType'] = EventType query['ConfigName'] = ConfigName query['ConfigVersion'] = ConfigVersion query['Production'] = ProductionID query['DataQuality'] = DataQuality return self.bkQuery(query) ############################################################################# def bkQuery(self, bkQueryDict): """ Developer function. Perform a query to the LHCb Bookkeeping to return a list of LFN(s). This method takes a BK query dictionary. Example Usage: >>> print dirac.bkQuery(query) {'OK':True,'Value':<files>} @param bkQueryDict: BK query @type bkQueryDict: dictionary (see bookkeepingQuery() for keys) @return: S_OK,S_ERROR """ problematicFields = [] # Remove the Visible flag as anyway the method is for visible files ;-) # bkQueryDict.setdefault( 'Visible', 'Yes' ) for name, value in bkQueryDict.items(): if name not in self._bkQueryTemplate: problematicFields.append(name) if problematicFields: msg = 'The following fields are not valid for a BK query: %s\nValid fields include: %s' % \ (', '.join(problematicFields), ', '.join(self._bkQueryTemplate.keys())) return S_ERROR(msg) for name, value in bkQueryDict.items(): if name == "Production" or name == "EventType" or name == "StartRun" or name == "EndRun": if value == 0: del bkQueryDict[name] else: bkQueryDict[name] = str(value) elif name == "FileType": if value.lower() == "all": bkQueryDict[name] = 'ALL' else: if str(value).lower() == "all": del bkQueryDict[name] if 'Production' in bkQueryDict or 'StartRun' in bkQueryDict or 'EndRun' in bkQueryDict: self.log.verbose('Found a specific query so loosening some restrictions to prevent BK overloading') else: if 'SimulationConditions' not in bkQueryDict and 'DataTakingConditions' not in bkQueryDict: return S_ERROR('A Simulation or DataTaking Condition must be specified for a BK query.') if 'EventType' not in bkQueryDict and 'ConfigName' not in bkQueryDict and 'ConfigVersion' not in bkQueryDict: return S_ERROR( 'The minimal set of BK fields for a query is: EventType, ConfigName and ConfigVersion' ' in addition to a Simulation or DataTaking Condition') self.log.verbose('Final BK query dictionary is:') for item in bkQueryDict.iteritems(): self.log.verbose('%s : %s' % item) start = time.time() result = self._bkClient.getVisibleFilesWithMetadata(bkQueryDict) # result = bk.getFilesWithGivenDataSets(bkQueryDict) rtime = time.time() - start self.log.info('BK query time: %.2f sec' % rtime) if not result['OK']: return S_ERROR('BK query returned an error: "%s"' % (result['Message'])) if not result['Value']: return self._errorReport('No BK files selected') returnedFiles = len(result['Value']) self.log.verbose('%s files selected from the BK' % (returnedFiles)) return result ############################################################################# def __checkDQFlags(self, flags): """ Internal function. Checks the provided flags against the list of possible DQ flag statuses from the Bookkeeping. """ dqFlags = [] if isinstance(flags, list): dqFlags = flags else: dqFlags = [flags] bkFlags = self.getAllDQFlags() if not bkFlags['OK']: return bkFlags final = [] for flag in dqFlags: if flag.lower() == 'all': final.append(flag.upper()) else: flag = flag.upper() if flag not in bkFlags['Value']: msg = 'Specified DQ flag "%s" is not in allowed list: %s' % (flag, ', '.join(bkFlags['Value'])) self.log.error(msg) return S_ERROR(msg) else: final.append(flag) # when first coding this it was not possible to use a list ;) if len(final) == 1: final = final[0] return S_OK(final) ############################################################################# def getAllDQFlags(self, printOutput=False): """ Helper function. Returns the list of possible DQ flag statuses from the Bookkeeping. Example Usage: >>> print dirac.getAllDQFlags() {'OK':True,'Value':<flags>} @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ result = self._bkClient.getAvailableDataQuality() if not result['OK']: self.log.error('Could not obtain possible DQ flags from BK with result:\n%s' % (result)) return result if printOutput: flags = result['Value'] self.log.info('Possible DQ flags from BK are: %s' % (', '.join(flags))) return result ############################################################################# def getDataByRun(self, lfns, printOutput=False): """Sort the supplied lfn list by run. An S_OK object will be returned containing a dictionary of runs and the corresponding list of LFN(s) associated with them. Example usage: >>> print dirac.getDataByRun(lfns) {'OK': True, 'Value': {<RUN>:['<LFN>','<LFN>',...], <RUN>:['<LFN>',..]}} @param lfns: Logical File Name(s) @type lfns: list @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ if isinstance(lfns, str): lfns = [lfns.replace('LFN:', '')] elif isinstance(lfns, list): try: lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns] except ValueError as x: return self._errorReport(str(x), 'Expected strings for LFNs') else: return self._errorReport('Expected single string or list of strings for LFN(s)') runDict = {} start = time.time() result = self._bkClient.getFileMetadata(lfns) self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start)) if not result['OK']: self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message'])) return result for lfn, metadata in result['Value']['Successful'].items(): if 'RunNumber' in metadata: runNumber = metadata['RunNumber'] runDict.setdefault(runNumber, []).append(lfn) else: self.log.warn('Could not find run number from BK for %s' % (lfn)) if printOutput: self.log.notice(self.pPrint.pformat(runDict)) return S_OK(runDict) ############################################################################# def bkMetadata(self, lfns, printOutput=False): """Return metadata for the supplied lfn list. An S_OK object will be returned containing a dictionary of LFN(s) and the corresponding metadata associated with them. Example usage: >>> print dirac.bkMetadata(lfns) {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}} @param lfns: Logical File Name(s) @type lfns: list @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ if isinstance(lfns, str): lfns = [lfns.replace('LFN:', '')] elif isinstance(lfns, list): try: lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns] except ValueError as x: return self._errorReport(str(x), 'Expected strings for LFNs') else: return self._errorReport('Expected single string or list of strings for LFN(s)') start = time.time() result = self._bkClient.getFileMetadata(lfns) self.log.verbose("Obtained BK file metadata in %.2f seconds" % (time.time() - start)) if not result['OK']: self.log.error('Failed to get bookkeeping metadata with result "%s"' % (result['Message'])) return result if printOutput: self.log.notice(self.pPrint.pformat(result['Value'])) return result ############################################################################# def lhcbProxyInit(self, *args): # pylint: disable=no-self-use """ just calling the dirac-proxy-init script """ os.system("dirac-proxy-init -o LogLevel=NOTICE -t --rfc %s" % "' '".join(args)) ############################################################################# def lhcbProxyInfo(self, *args): # pylint: disable=no-self-use """ just calling the dirac-proxy-info script """ os.system("dirac-proxy-info -o LogLevel=NOTICE %s" % "' '".join(args)) ############################################################################# def gridWeather(self, printOutput=False): """This method gives a snapshot of the current Grid weather from the perspective of the DIRAC site and SE masks. Tier-1 sites are returned with more detailed information. Example usage: >>> print dirac.gridWeather() {'OK': True, 'Value': {{'Sites':<siteInfo>,'SEs':<seInfo>,'Tier-1s':<tierInfo>}} @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ lcgSites = gConfig.getSections('/Resources/Sites/LCG') if not lcgSites['OK']: return lcgSites for lcgSite in lcgSites['Value']: tier = gConfig.getValue('/Resources/Sites/LCG/%s/MoUTierLevel' % lcgSite, 2) if tier in (0, 1): self.tier1s.append(lcgSite) siteInfo = self.checkSites() if not siteInfo['OK']: return siteInfo siteInfo = siteInfo['Value'] seInfo = self.checkSEs() if not seInfo['OK']: return seInfo seInfo = seInfo['Value'] tierSEs = {} for site in self.tier1s: tierSEs[site] = getSEsForSite(site)['Value'] tierInfo = {} for site, seList in tierSEs.items(): tierInfo[site] = {} for se in seList: if se in seInfo: tierSEInfo = seInfo[se] tierInfo[site][se] = tierSEInfo if site in siteInfo['AllowedSites']: tierInfo[site]['MaskStatus'] = 'Allowed' else: tierInfo[site]['MaskStatus'] = 'Banned' if printOutput: self.log.notice('========> Tier-1 status in DIRAC site and SE masks') for site in sorted(self.tier1s): self.log.notice('\n====> %s is %s in site mask\n' % (site, tierInfo[site]['MaskStatus'])) self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15))) for se in sorted(tierSEs[site]): if se in tierInfo[site]: self.log.notice('%s %s %s' % (se.ljust(25), tierInfo[site][se]['ReadStatus'].rjust(15), tierInfo[site][se]['WriteStatus'].rjust(15)) ) self.log.notice('\n========> Tier-2 status in DIRAC site mask\n') allowedSites = siteInfo['AllowedSites'] bannedSites = siteInfo['BannedSites'] for site in self.tier1s: if site in allowedSites: allowedSites.remove(site) if site in bannedSites: bannedSites.remove(site) self.log.notice(' %s sites are in the site mask, %s are banned.\n' % (len(allowedSites), len(bannedSites))) summary = {'Sites': siteInfo, 'SEs': seInfo, 'Tier-1s': tierInfo} return S_OK(summary) ############################################################################# def checkSites(self, printOutput=False): # pylint: disable=no-self-use """Return the list of sites in the DIRAC site mask and those which are banned. Example usage: >>> print dirac.checkSites() {'OK': True, 'Value': {'AllowedSites':['<Site>',...],'BannedSites':[]} @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ res = getSites() if not res['OK']: self.log.error('Could not get list of sites from CS', res['Message']) return res totalList = res['Value'] res = DiracAdmin().getSiteMask() if not res['OK']: return res sites = res['Value'] bannedSites = [] for site in totalList: if site not in sites: bannedSites.append(site) if printOutput: self.log.notice('\n========> Allowed Sites\n') self.log.notice('\n'.join(sites)) self.log.notice('\n========> Banned Sites\n') self.log.notice('\n'.join(bannedSites)) self.log.notice('\nThere is a total of %s allowed sites and %s banned sites in the system.' % (len(sites), len(bannedSites))) return S_OK({'AllowedSites': sites, 'BannedSites': bannedSites}) ############################################################################# def checkSEs(self, printOutput=False): # pylint: disable=no-self-use """Check the status of read and write operations in the DIRAC SE mask. Example usage: >>> print dirac.checkSEs() {'OK': True, 'Value': {<LFN>:{'<Name>':'<Value>',...},...}} @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ res = gConfig.getSections('/Resources/StorageElements', True) if not res['OK']: self.log.error('Failed to get storage element information', res['Message']) return res if printOutput: self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15))) seList = sorted(res['Value']) result = {} rss = ResourceStatus() for se in seList: res = rss.getElementStatus(se, 'StorageElement') if not res['OK']: self.log.error("Failed to get StorageElement status for %s" % se) else: readState = res['Value'].get('ReadAccess', 'Active') writeState = res['Value'].get('WriteAccess', 'Active') result[se] = {'ReadStatus': readState, 'WriteStatus': writeState} if printOutput: self.log.notice('%s %s %s' % (se.ljust(25), readState.rjust(15), writeState.rjust(15))) return S_OK(result) def splitInputDataBySize(self, lfns, maxSizePerJob=20, printOutput=False): """Split the supplied lfn list by the replicas present at the possible destination sites, based on a maximum size. An S_OK object will be returned containing a list of lists in order to create the jobs. Example usage: >>> d.splitInputDataBySize(lfns,10) {'OK': True, 'Value': [['<LFN>'], ['<LFN>']]} @param lfns: Logical File Name(s) to split @type lfns: list @param maxSizePerJob: Maximum size (in GB) per bunch @type maxSizePerJob: integer @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ sitesForSE = {} if isinstance(lfns, str): lfns = [lfns.replace('LFN:', '')] elif isinstance(lfns, list): try: lfns = [str(lfn.replace('LFN:', '')) for lfn in lfns] except TypeError as x: return self._errorReport(str(x), 'Expected strings for LFNs') else: return self._errorReport('Expected single string or list of strings for LFN(s)') if not isinstance(maxSizePerJob, int): try: maxSizePerJob = int(maxSizePerJob) except ValueError as x: return self._errorReport(str(x), 'Expected integer for maxSizePerJob') maxSizePerJob *= 1000 * 1000 * 1000 replicaDict = self.getReplicas(lfns) if not replicaDict['OK']: return replicaDict replicas = replicaDict['Value']['Successful'] if not replicas: return self._errorReport(replicaDict['Value']['Failed'].items()[0], 'Failed to get replica information') siteLfns = {} for lfn, reps in replicas.items(): possibleSites = set(site for se in reps for site in sitesForSE.setdefault(se, getSitesForSE(se).get('Value', []))) siteLfns.setdefault(','.join(sorted(possibleSites)), []).append(lfn) if '' in siteLfns: # Some files don't have active replicas return self._errorReport('No active replica found for', str(siteLfns[''])) # Get size of files metadataDict = self.getLfnMetadata(lfns, printOutput) if not metadataDict['OK']: return metadataDict fileSizes = dict((lfn, metadataDict['Value']['Successful'].get(lfn, {}).get('Size', maxSizePerJob)) for lfn in lfns) lfnGroups = [] # maxSize is in GB for files in siteLfns.values(): # Now get bunches of files, # Sort in decreasing size files.sort(cmp=(lambda f1, f2: fileSizes[f2] - fileSizes[f1])) while files: # print [( lfn, fileSizes[lfn] ) for lfn in files] group = [] sizeTot = 0 for lfn in list(files): size = fileSizes[lfn] if size >= maxSizePerJob: lfnGroups.append([lfn]) elif sizeTot + size < maxSizePerJob: sizeTot += size group.append(lfn) files.remove(lfn) if group: lfnGroups.append(group) if printOutput: self.log.notice(self.pPrint.pformat(lfnGroups)) return S_OK(lfnGroups) ############################################################################# def getAccessURL(self, lfn, storageElement, protocol=None, printOutput=False): """Allows to retrieve an access URL for an LFN replica given a valid DIRAC SE name. Contacts the file catalog and contacts the site SRM endpoint behind the scenes. Example Usage: >>> print dirac.getAccessURL('/lhcb/data/CCRC08/DST/00000151/0000/00000151_00004848_2.dst','CERN-RAW') {'OK': True, 'Value': {'Successful': {'srm://...': {'SRM2': 'rfio://...'}}, 'Failed': {}}} :param lfn: Logical File Name (LFN) :type lfn: str or python:list :param storageElement: DIRAC SE name e.g. CERN-RAW :type storageElement: string :param printOutput: Optional flag to print result :type printOutput: boolean :returns: S_OK,S_ERROR """ ret = self._checkFileArgument(lfn, 'LFN') if not ret['OK']: return ret lfn = ret['Value'] if isinstance(lfn, basestring): lfn = [lfn] results = getAccessURL(lfn, storageElement, protocol=protocol) if printOutput: printDMResult(results, empty="File not at SE", script="dirac-dms-lfn-accessURL") return results ############################################################################# def _getLocalInputData(self, parameters): """ LHCb extension of DIRAC API's _getLocalInputData. Only used for handling ancestors. """ inputData = parameters.get('InputData') if inputData: self.log.debug("DiracLHCb._getLocalInputData. InputData: %s" % inputData) if isinstance(inputData, basestring): inputData = inputData.split(';') inputData = [lfn.strip('LFN:') for lfn in inputData] ancestorsDepth = int(parameters.get('AncestorDepth', 0)) if ancestorsDepth: self.log.debug("DiracLHCb._getLocalInputData. ancestorsDepth: %d" % ancestorsDepth) res = self._bkClient.getFileAncestors(inputData, ancestorsDepth) if not res['OK']: self.log.error("Can't get ancestors", res['Message']) return res ancestorsLFNs = [] for ancestorsLFN in res['Value']['Successful'].itervalues(): ancestorsLFNs += [i['FileName'] for i in ancestorsLFN] self.log.info("DiracLHCb._getLocalInputData: adding %d ancestors" % len(ancestorsLFNs)) self.log.verbose("%s", ', '.join(ancestorsLFNs)) inputData += ancestorsLFNs return S_OK(inputData)