def main(argv): client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() client.read_config(AMI_CONFIG) datasetNamePattern=argv[0] res = get_datasets(client,datasetNamePattern,fields='events,nfiles', flatten=True) print_table( res )
class McStatsLookup(object): """ Tool to look up stats in mc datasets that exist in both atlfast and fullsim. """ def __init__(self, p_tag, origin='mc12_8TeV', backup_ptag=None): self.p_tag = p_tag self.backup_ptag = backup_ptag self.origin = origin self.ntup_filter = 'NTUP_SUSY' self._setup_ami_client() self.outstream = sys.stdout self.bugstream = sys.stderr self.atlfinder = re.compile('(_a([0-9])+)+') def _setup_ami_client(self): self.client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() self.client.read_config(AMI_CONFIG) def get_atlfast_fullsim(self, ds_id, stream=None): args = {'dataset_number':str(ds_id)} match_sets = query.get_datasets(self.client,'%', **args) if not match_sets: raise DatasetMatchError('found nothing with {}'.format( args.items()), match_sets) match_sets = _filter_by_ldn( match_sets, stream, self.ntup_filter, self.p_tag) atlfast_ds = None fullsim_ds = None for m in match_sets: ldn = _ldn(m) if self.atlfinder.search(_ldn(m)): if atlfast_ds: raise DatasetMatchError('at least two atlfast', [atlfast_ds,m]) atlfast_ds = m else: fullsim_ds = _largest_fullsim_filter(fullsim_ds, m)[0] atlfast_counts = _get_expected_counts(self.client, atlfast_ds) fullsim_counts = _get_expected_counts(self.client, fullsim_ds) atl = (_ldn(atlfast_ds), atlfast_counts) if atlfast_ds else None ful = (_ldn(fullsim_ds), fullsim_counts) if fullsim_ds else None return atl, ful
def main(argv): arguments = [] arguments.append("SearchQuery") arguments.append("glite=select logicalDatasetName, totalEvents where (amiStatus='VALID' AND dataset.prodsysStatus='ALL EVENTS AVAILABLE' AND dataset.dataType='AOD' AND dataset.logicalDatasetName LIKE 'data12%merge%' and totalEvents > 6000000)" ) arguments.append("project=Atlas_Production") arguments.append("processingStep=Atlas_Production") arguments.append("entity=dataset") amiClient = AMIClient() try: result=amiClient.execute(arguments) # in csv just for fun print result.output('csv') except Exception, msg: error = str(msg) print error
def GetAMIClient(useReplica=False): # This version uses AMI4 from pyAMI.exceptions import AMI_Error from pyAMI.client import AMIClient if useReplica: print "INFO: Using CERN AMI replica" from pyAMI import endpoint endpoint.TYPE = "replica" else: print "INFO: Using primary AMI" amiclient=AMIClient() return amiclient
def _setup_ami_client(self): self.client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() self.client.read_config(AMI_CONFIG)
class AmiAugmenter(object): """ Class to wrap ami augmentation. """ def __init__(self, p_tag, origin='mc12_8TeV', backup_ptag=None): self.p_tag = p_tag self.backup_ptag = backup_ptag self.origin = origin self.ntup_filter = 'NTUP_SUSY' self._setup_ami_client() self.outstream = sys.stdout self.bugstream = sys.stderr def _setup_ami_client(self): self.client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() self.client.read_config(AMI_CONFIG) def get_dataset_range(self, ds_range, physics_type=None): ds_dict = {} for num in ds_range: self.outstream.write('looking up {}, category {}\n'.format( num, physics_type)) for ds in self.ds_from_id(num): if physics_type: ds.physics_type = physics_type if ds.key in ds_dict: old = ds_dict[ds.key] largest_ds = _filter_for_largest_tags(ds, old) if len(largest_ds) == 1: ds_dict[ds.key] = largest_ds[0] else: raise DatasetOverwriteError( "tried to overwrite {},\n{} with \n{}".format( ds.key, ds.full_name, ds_dict[ds.key].full_name)) else: ds_dict[ds.key] = ds return ds_dict def ds_from_id(self, ds_id, stream=None): if stream and stream.startswith('physics'): args = {'run':str(ds_id)} else: args = {'dataset_number':str(ds_id)} qstr = '%'.join([self.origin, str(ds_id), stream or '']) match_sets = query.get_datasets(self.client,qstr, **args) if not match_sets: raise DatasetMatchError('found nothing with {}'.format(qstr), match_sets) match_sets = _filter_by_ldn( match_sets, stream, self.ntup_filter, self.p_tag) for match in match_sets: ldn = _ldn(match) yield self.ds_from_ldn(ldn) def ds_from_ldn(self, ldn): info = query.get_dataset_info(self.client, ldn) ds = meta.Dataset(ldn) self._write_ami_info(ds, info) if not ds.is_data: self._write_mc_ami_info(ds, info) return ds def get_datasets_year(self, year=12, stream=None): datasets = {} periods = query.get_periods(self.client, year=year) hep_periods = [] for period in periods: if period.project.endswith('TeV'): hep_periods.append(period.name) runs = query.get_runs(self.client, hep_periods, year=year) n_runs = len(runs) for run_n, run in enumerate(runs): self.outstream.write( 'looking for {} of {} {}...'.format(run_n, n_runs, run)) try: ds = next(iter(self.ds_from_id(run, stream))) except DatasetMatchError as err: if err.matches: self.outstream.write('none in stream\n') self.bugstream.write(str(err) + '\n') continue else: self.outstream.write('nothing\n') self.bugstream.write(str(err) + '\n') continue self.outstream.write('found: {}\n'.format(ds.full_name)) datasets[ds.key] = ds return datasets def _bug_report_line(self, line, ds, info): diagnostic = 'for {} {}. In info: {}\n'.format( ds.key, ds.name, ', '.join(info.extra.keys())) responsible = '' argv=["SearchQuery"] argv.append( "-sql=select physicistResponsible from dataset where " "logicalDatasetName='{}'".format(info.info['logicalDatasetName'])) argv.append('project=mc12_001') argv.append('processingStep=production') result = self.client.execute(argv) for row in result.rows(): if 'physicistResponsible' in row: assert not responsible, 'two responsible physicists found' responsible = row['physicistResponsible'] tmp_str = '{} (email: {res}) {}' if responsible else '{} {}' return tmp_str.format(line, diagnostic, res=responsible) def _campaign(self, ldn): responsible = '' # argv=['ListDatasetProvenance'] argv=["ListCampaignForDataset"] argv.append("logicalDatasetName={}".format(ldn)) try: result = self.client.execute(argv) for row in result.rows(): print 'found campaign: {}!'.format(row) except: print 'fail' def _write_mc_ami_info(self, ds, info, overwrite=False): self._campaign(ds.full_name) if not ds.filteff or overwrite: filteff_list = ['GenFiltEff_mean', 'approx_GenFiltEff'] for name in filteff_list: if name in info.extra: ds.filteff = float(info.extra[name]) break if not ds.filteff: self.bugstream.write( self._bug_report_line("can't find filteff", ds, info)) new_xsec = 0.0 xsec_list = ['crossSection_mean', 'approx_crossSection'] for name in xsec_list: if name in info.extra: # ami stores in nb new_xsec = float(info.extra[name])*1e6 break if not new_xsec: self.bugstream.write( self._bug_report_line("can't find xsection", ds, info)) return if not ds.total_xsec_fb or overwrite: ds.total_xsec_fb = new_xsec else: diff = ds.total_xsec_fb - new_xsec rel_dif = abs(diff / ds.total_xsec_fb) if rel_dif > 0.1: warn('for sample {id} {name}: ' 'ami gives xsec of {ami} fb, ' 'susytools gives {st} (diff {diff:.1%})'.format( id=ds.id, name=ds.name, ami=new_xsec, st=ds.total_xsec_fb, diff=rel_dif)) def _write_ami_info(self, ds, info): ds.n_expected_entries = int(info.info['totalEvents']) ds.meta_sources.add('ami')
'merge': (4540,)}} HERE = os.path.dirname(os.path.abspath(__file__)) # Any datasets which don't have the provenance stored properly in AMI # should be hardcoded here (it happens) DS_NOPROV = {} # Cross-sections are cached so that we don't need to keep asking AMI # for them over and over XSEC_CACHE_FILE = os.path.join(HERE, 'xsec', 'cache.pickle') XSEC_CACHE_MODIFIED = False XSEC_CACHE = {} if USE_PYAMI: amiclient = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() amiclient.read_config(AMI_CONFIG) class NoMatchingDatasetsFound(Exception): pass class Database(dict): @classmethod def match_to_ds(cls, match): """ Construct the original NTUP dataset name from a skim match object
def convertGoodRunListXMLtoDS(tmpLog,goodRunListXML,goodRunDataType='',goodRunProdStep='', goodRunListDS='',verbose=False): tmpLog.info('trying to convert GoodRunListXML to a list of datasets') # return for failure failedRet = False,'',[] # import pyAMI try: from pyAMI.client import AMIClient except: errType,errValue = sys.exc_info()[:2] print "%s %s" % (errType,errValue) tmpLog.error('cannot import pyAMI module') return failedRet # read XML try: gl_xml = open(goodRunListXML) except: tmpLog.error('cannot open %s' % goodRunListXML) return failedRet # parse XML to get run/lumi runLumiMap = {} import xml.dom.minidom rootDOM = xml.dom.minidom.parse(goodRunListXML) for tmpLumiBlock in rootDOM.getElementsByTagName('LumiBlockCollection'): for tmpRunNode in tmpLumiBlock.getElementsByTagName('Run'): tmpRunNum = long(tmpRunNode.firstChild.data) for tmpLBRange in tmpLumiBlock.getElementsByTagName('LBRange'): tmpLBStart = long(tmpLBRange.getAttribute('Start')) tmpLBEnd = long(tmpLBRange.getAttribute('End')) # append if not runLumiMap.has_key(tmpRunNum): runLumiMap[tmpRunNum] = [] runLumiMap[tmpRunNum].append((tmpLBStart,tmpLBEnd)) # make arguments amiArgv = [] amiArgv.append("GetGoodDatasetList") amiArgv.append("goodRunList="+gl_xml.read()) gl_xml.close() if goodRunDataType != '': amiArgv.append('dataType=%s' % goodRunDataType) if goodRunProdStep != '': amiArgv.append('prodStep=%s' % goodRunProdStep) if verbose: tmpLog.debug(amiArgv) # convert for wildcard goodRunListDS = goodRunListDS.replace('*','.*') # list of datasets if goodRunListDS == '': goodRunListDS = [] else: goodRunListDS = goodRunListDS.split(',') # execute try: amiclient = AMIClient() amiOut = amiclient.execute(amiArgv) except: errType,errValue = sys.exc_info()[:2] tmpLog.error("%s %s" % (errType,errValue)) tmpLog.error('pyAMI failed') return failedRet # get dataset map amiOutDict = amiOut.to_dict() if verbose: tmpLog.debug(amiOutDict) if not amiOutDict.has_key('goodDatasetList'): tmpLog.error("output from pyAMI doesn't contain goodDatasetList") return failedRet amiDsDict = amiOutDict['goodDatasetList'] # parse datasetMapFromAMI = {} dq2 = DQ2() for tmpKey,tmpVal in amiDsDict.iteritems(): if tmpVal.has_key('logicalDatasetName'): dsName = str(tmpVal['logicalDatasetName']) runNumber = long(tmpVal['runNumber']) # check dataset names if goodRunListDS == []: matchFlag = True else: matchFlag = False for tmpPatt in goodRunListDS: if re.search(tmpPatt,dsName) != None: matchFlag = True if not matchFlag: continue # check with DQ2 since AMI doesn't store / dsmap = {} try: tmpLog.debug("getting the list of files from DDM for %s" % dsName) dsmap = dq2.listDatasets(dsName,onlyNames=True) except: pass if not dsmap.has_key(dsName): dsName += '/' # check duplication for the run number if matchFlag: newFlag = True if datasetMapFromAMI.has_key(runNumber): # check configuration tag to use new one newConfigTag = checkConfigTag(datasetMapFromAMI[runNumber], dsName) if newConfigTag == True: del datasetMapFromAMI[runNumber] elif newConfigTag == False: # keep existing one newFlag = False # append if newFlag: if not datasetMapFromAMI.has_key(runNumber): datasetMapFromAMI[runNumber] = [] datasetMapFromAMI[runNumber].append(dsName) # make string amiRunNumList = datasetMapFromAMI.keys() amiRunNumList.sort() datasets = '' filesStr = [] for tmpRunNum in amiRunNumList: datasetListFromAMI = datasetMapFromAMI[tmpRunNum] for dsName in datasetListFromAMI: datasets += '%s,' % dsName # get files in the dataset tmpFilesStr = [] tmpFileGUIDMap,tmpStamp = dq2.listFilesInDataset(dsName) tmpFileMap = convertGuidToLfnMap(tmpFileGUIDMap) tmpLFNList = tmpFileMap.keys() tmpLFNList.sort() for tmpLFN in tmpLFNList: # extract LBs tmpItems = tmpLFN.split('.') # sort format if len(tmpItems) < 7: tmpFilesStr.append(tmpLFN) continue tmpLBmatch = re.search('_lb(\d+)-lb(\d+)',tmpLFN) # _lbXXX-lbYYY not found if tmpLBmatch != None: LBstart_LFN = long(tmpLBmatch.group(1)) LBend_LFN = long(tmpLBmatch.group(2)) else: # try ._lbXYZ. tmpLBmatch = re.search('\._lb(\d+)\.',tmpLFN) if tmpLBmatch != None: LBstart_LFN = long(tmpLBmatch.group(1)) LBend_LFN = LBstart_LFN else: tmpFilesStr.append(tmpLFN) continue # check range if not runLumiMap.has_key(tmpRunNum): tmpLog.error('AMI gives a wrong run number (%s) which is not contained in %s' % \ (tmpRunNum,goodRunListXML)) return failedRet inRange = False for LBstartXML,LBendXML in runLumiMap[tmpRunNum]: if (LBstart_LFN >= LBstartXML and LBstart_LFN <= LBendXML) or \ (LBend_LFN >= LBstartXML and LBend_LFN <= LBendXML) or \ (LBstart_LFN >= LBstartXML and LBend_LFN <= LBendXML) or \ (LBstart_LFN <= LBstartXML and LBend_LFN >= LBendXML): inRange = True break if inRange: tmpFilesStr.append(tmpLFN) # check if files are found if tmpFilesStr == '': tmpLog.error('found no files with corresponding LBs in %s' % dsName) return failedRet filesStr += tmpFilesStr datasets = datasets[:-1] if verbose: tmpLog.debug('converted to DS:%s LFN:%s' % (datasets,str(filesStr))) # return return True,datasets,filesStr
# the above line needed to ensure correct python environment since pyAMI 4.0.3 from pyAMI import endpoint from pyAMI.client import AMIClient from pyAMI.endpoint import get_endpoint,get_XSL_URL from pyAMI.auth import AMI_CONFIG, create_auth_config import os # set up your arguments for your favourite command argv=[] argv.append("GetUserInfo") # the following will fail on the replica but succeed on the main, because the replica is case sensitive! argv.append("amiLogin=ALBRAND") #to use the replica endpoint.TYPE = 'replica' print get_endpoint() print get_XSL_URL() amiClient = AMIClient() # Read the config file of username and password. # prompt if it is not there if not os.path.exists(AMI_CONFIG): create_auth_config() amiClient.read_config(AMI_CONFIG) try: result=amiClient.execute(argv) print "Reading from the CERN replica: "+result.output("xml") except Exception, msg: error = str(msg) print error endpoint.TYPE = 'main' try: result=amiClient.execute(argv)
} HERE = os.path.dirname(os.path.abspath(__file__)) # Any datasets which don't have the provenance stored properly in AMI # should be hardcoded here (it happens) DS_NOPROV = {} # Cross-sections are cached so that we don't need to keep asking AMI # for them over and over XSEC_CACHE_FILE = os.path.join(HERE, 'xsec', 'cache.pickle') XSEC_CACHE_MODIFIED = False XSEC_CACHE = {} if USE_PYAMI: amiclient = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() amiclient.read_config(AMI_CONFIG) class NoMatchingDatasetsFound(Exception): pass class Database(dict): @classmethod def match_to_ds(cls, match): """ Construct the original NTUP dataset name from a skim match object """
def get_tag_diffs(mon, ref, g, mon_task, ref_task): client = AMIClient() if not os.path.exists(AMI_CONFIG): create_auth_config() client.read_config(AMI_CONFIG) mon_release = '' ref_release = '' mon_taginfo = get_configtags(client, mon.rtag) ref_taginfo = get_configtags(client, ref.rtag) configtags = [ 'SWReleaseCache', 'lvl1ps', 'hltps', 'smk', 'enlvl1prescales' ] configcomp = {} for n, info in enumerate(mon_taginfo): ref_info = ref_taginfo[n] for xinfo in info.keys(): if xinfo in configtags: if xinfo == 'SWReleaseCache': mon_rel = info[xinfo] configcomp[xinfo] = [info[xinfo], ref_info[xinfo]] for info in ref_taginfo: for xinfo in info.keys(): if xinfo == 'SWReleaseCache': ref_rel = info[xinfo] mon_release = mon_rel.replace('_', ',') ref_release = ref_rel.replace('_', ',') import PyCmt.Cmt as Cmt diffs = Cmt.get_tag_diff(ref=ref_release, chk=mon_release, verbose=False) g.write('<table>\n') g.write('<tr><td width="250"></td><td width="250"><b>Reprocessing</b></td>' '<td width="250"><b>Reference</b></tr>') ami_link = '<a href ="https://ami.in2p3.fr/AMI/servlet/net.hep.atlas.Database.Bookkeeping.AMI.Servlet.Command?Converter=/AMIXmlToAMIProdHtml.xsl&Command=FormListConfigurationTag+-configTag=%s">%s</a></td>' sav_link = '<a href="https://savannah.cern.ch/task/?%s"> Task #%s </a></td>' g.write('<tr><td>AMI Tag </td>') for tag in (mon.rtag, ref.rtag): g.write( '<td><a href ="https://ami.in2p3.fr/AMI/servlet/net.hep.atlas.Database.Bookkeeping.AMI.Servlet.Command?Converter=/AMIXmlToAMIProdHtml.xsl&Command=FormListConfigurationTag+-configTag=%s">%s</a></td>' % (tag, tag)) g.write('</tr>') g.write('<tr><td> Savannah Task </td>') for task in (mon_task, ref_task): if task == None: g.write( '<td><a href="https://savannah.cern.ch/task/index.php?go_report=Apply&group=atlas-trig&func=browse&category_id=107&status_id=0"> Search Tasks </a></td>' ) else: g.write( '<td><a href="https://savannah.cern.ch/task/?%s"> Task #%s </a></td>' % (task, task)) g.write('</tr>\n') g.write('<tr><td> Run </td>') for run in (mon.run, ref.run): g.write('<td> %s </td>' % str(run)) g.write('</tr><tr></tr>\n') g.write('<tr><td><b>Tag Configuration </b></td></tr>\n') for field in configtags: g.write('<tr><td>%s</td><td>%s</td><td>%s</td>' % (field, configcomp[field][0], configcomp[field][1])) g.write('</tr>\n') g.write('<tr></tr>') g.write('</table>') g.write('<h3> Release Tag Differences </h3>') g.write('<p> Found [%i] differences </p>\n' % len(diffs)) if len(diffs): g.write('<table>\n') g.write( '<tr><td width = "150"><b>Reprocessing</b></td><td width="250"><b>mon-project</b></td>' ) g.write( '<td width = "150"><b>Reference</b></td><td width="250"><b>ref-project</b></td>' ) g.write('<td width = "500"><b>package name</b></td></tr>') for diff in diffs: g.write('<tr>') g.write('<td> %s </td>' % diff['chk']) g.write('<td> %s </td>' % diff['chk_proj']) g.write('<td> %s </td>' % diff['ref']) g.write('<td> %s </td>' % diff['ref_proj']) g.write('<td> %s </td>' % diff['full_name']) g.write('</tr>\n') g.write('</table>') return 0
# print ("Will manually add ZSI and 4suite, then try again...") # import sys # sys.path.insert(0,'/afs/cern.ch/atlas/offline/external/ZSI/2.1-a1/lib/python') # sys.path.insert(0,'/afs/cern.ch/sw/lcg/external/4suite/1.0.2_python2.5/slc4_ia32_gcc34/lib/python2.5/site-packages') # from pyAMI.pyAMI import AMI # print ("import pyAMI was succesful") # amiclient=AMI(False) # import setup_pyAMI # from pyAMI.pyAMI import AMI try: from pyAMI.client import AMIClient except ImportError: print("WARNING unable to import AMIClient from pyAMI") print("import pyAMI4 was successful") amiclient = AMIClient(False) #------------------------ #Build final AMI tag info s = {} #stable values s['configTag'] = amiTag s['AMIUser'] = login s['AMIPass'] = password c = {} #changing values c['phconfig'] = str(processConfigs[process]['phconfig'].__str__()) c['inputs'] = str(processConfigs[process]['inputs'].__str__()) c['outputs'] = str(processConfigs[process]['outputs'].__str__()) if humanReadableOutputs: c['Human-readable outputs'] = str(humanReadableOutputs) c['SWReleaseCache'] = str(release)
# dataType='AOD' and version like '%r3542' and datasetNumber=146932" \ # -project=mc12_001 -processingStep=production argv=[] argv.append("SearchQuery") # SQL syntax - goes to a specific catalogue argv.append( "-sql=select logicalDatasetName from dataset where " "dataType='AOD' and version like '%r3542' and datasetNumber=146932") # Tell AMI in which catalogue you want to look. (Or use the gLite syntax) argv.append('project=mc12_001') argv.append('processingStep=production') amiClient = AMIClient() try: result=amiClient.execute(argv) # Change the output format to csv. print result.output('csv') except Exception, msg: error = str(msg) print error argv=[] argv.append("SearchQuery") #gLite syntax - searches over ALL catalogues - can be slower argv.append(