def dbs_dataset4site_release(dbs_url, release): "Get dataset for given site and release" expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs': # in DBS3 I'll use datasets API and pass release over there query = 'find dataset where release=%s' % release dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs') prim_key = 'dataset' for row in qlxml_parser(source, prim_key): if 'dataset' in row: dataset = row['dataset']['dataset'] yield dataset elif 'error' in row: err = row.get('reason', None) err = err if err else row['error'] yield 'DBS error: %s' % err else: # we call datasets?release=release to get list of datasets dbs_url += '/datasets' dbs_args = \ {'release_version': release, 'dataset_access_type':'VALID'} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for rec in json_parser(source, None): for row in rec: yield row['dataset']
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if url.find('https:') != -1: return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) else: return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, system=self.name)
def datasets_dbs2(urls, verbose=0): """DBS2 implementation of datasets function""" headers = {'Accept':'application/xml;text/xml'} records = [] url = urls.get('dbs') query = 'find dataset,dataset.tier,dataset.era where dataset.status like VALID*' params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} stream, _ = getdata(url, params, headers, verbose=verbose) records = [r for r in qlxml_parser(stream, 'dataset')] stream.close() data = {} size = 10 # size for POST request to Phedex for row in records: dataset = row['dataset'] if not data.has_key(dataset['dataset']): data[dataset['dataset']] = \ dict(era=dataset['dataset.era'], tier=dataset['dataset.tier']) if len(data.keys()) > size: for rec in dataset_info(urls, data): yield rec data = {} if data: for rec in dataset_info(urls, data): yield rec del records
def phedex_files(phedex_url, kwds): "Get file information from Phedex" params = dict(kwds) # parameters to be send to Phedex site = kwds.get('site', None) if site and phedex_node_pattern.match(site): if not site.endswith('*'): # this will account to look-up site names w/o _Buffer or _MSS site += '*' params.update({'node': site}) params.pop('site') elif site and se_pattern.match(site): params.update({'se': site}) params.pop('site') else: return expire = 600 # set some expire since we're not going to use it headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT, system='phedex') tags = 'block.file.name' prim_key = 'block' for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) files = ddict.get('block.file') if not isinstance(files, list): files = [files] for row in files: yield row['name']
def findReqMgrIds(dataset, base='https://cmsweb.cern.ch', verbose=False): """ Find ReqMgrIds for a given dataset. This is quite complex procedure in CMS. We need to query ReqMgr data-service cache and find workflow ids by outputdataset name. The ReqMgr returns either document with ids used by MCM (i.e. ProcConfigCacheID, ConfigCacheID, SkimConfigCacheID) or we can take id of the request which bypass MCM. For refences see these discussions: https://github.com/dmwm/DAS/issues/4045 https://hypernews.cern.ch/HyperNews/CMS/get/dmDevelopment/1501/1/1/1/1.html """ params = {'key': '"%s"' % dataset, 'include_docs':'true'} url = "%s/couchdb/reqmgr_workload_cache/_design/ReqMgr/_view/byoutputdataset" \ % base headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) ids = [] for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] if 'ProcConfigCacheID' in doc: ids.append(doc['ProcConfigCacheID']) elif 'ConfigCacheID' in doc: ids.append(doc['ConfigCacheID']) elif 'SkimConfigCacheID' in doc: ids.append(doc['SkimConfigCacheID']) else: if 'id' in rec and 'key' in rec and rec['key'] == dataset: ids.append(rec['id']) return ids
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if not headers: headers = {"Accept": "application/json"} # MCM uses rest API if "dataset" in params: url = "%s%s" % (url, params.get("dataset")) elif "mcm" in params: url = "%s/%s" % (url, params.get("mcm")) else: return {} params = {} result = getdata( url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, doseq=False, system=self.name, ) return result
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if not headers: headers = {'Accept': 'application/json'} # MCM uses rest API if 'dataset' in params: url = '%s%s' % (url, params.get('dataset')) elif 'mcm' in params: url = '%s/%s' % (url, params.get('mcm')) elif 'prepid' in params: url = '%s/%s' % (url, params.get('prepid')) else: return {} params = {} result = getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, doseq=False, system=self.name) return result
def dataset_info(urls, datasetdict, verbose=0): """ Request blockReplicas information from Phedex for a given dataset or a list of dataset (use POST request in later case). Update MongoDB with aggregated information about dataset: site, size, nfiles, nblocks. """ url = urls.get('phedex') + '/blockReplicas' params = {'dataset': [d for d in datasetdict.keys()]} headers = {'Accept':'application/json;text/json'} data, _ = getdata(url, params, headers, post=True, \ ckey=CKEY, cert=CERT, verbose=verbose, system='dbs_phedex') if isinstance(data, basestring): # no response dastimestamp('DBS_PHEDEX ERROR: %s' % data) return jsondict = json.load(data) data.close() for row in jsondict['phedex']['block']: dataset = row['name'].split('#')[0] for rep in row['replica']: rec = dict(dataset=dataset, nfiles=row['files'], size=row['bytes'], site=rep['node'], se=rep['se'], custodial=rep['custodial']) rec.update(datasetdict[dataset]) yield rec data.close()
def dbs_find(entity, url, kwds): "Find files for given set of parameters" if entity not in ['run', 'file', 'block']: msg = 'Unsupported entity key=%s' % entity raise Exception(msg) expire = 600 dataset = kwds.get('dataset', None) block = kwds.get('block', None) lfn = kwds.get('lfn', None) runs = kwds.get('runs', []) if not (dataset or block or lfn): return query = 'find %s' % entity if dataset: query += ' where dataset=%s' % dataset elif block: query += ' where block=%s' % block elif lfn: query += ' where file=%s' % lfn if runs: rcond = ' or '.join(['run=%s' % r for r in runs]) query += ' and (%s)' % rcond params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT) pkey = entity for row in qlxml_parser(source, pkey): val = row[entity][entity] yield val
def phedex_files(phedex_url, kwds): "Get file information from Phedex" params = dict(kwds) # parameters to be send to Phedex site = kwds.get('site', None) if site and phedex_node_pattern.match(site): if not site.endswith('*'): # this will account to look-up site names w/o _Buffer or _MSS site += '*' params.update({'node': site}) params.pop('site') elif site and se_pattern.match(site): params.update({'se': site}) params.pop('site') else: return expire = 600 # set some expire since we're not going to use it headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT, system='phedex') tags = 'block.file.name' prim_key = 'block' for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) files = ddict.get('block.file') if not isinstance(files, list): files = [files] for row in files: yield row['name']
def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') conn = db_connection(self.dburi) col = conn[self.name][cname] local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if 'expire' not in r][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if url[-1] == '/': url = url[:-1] return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name)
def run_lumis_dbs(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS for a given dataset" res = {} # output result api_url = url + '/blocks' params = {'dataset': dataset} data, _ = getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for row in json.load(data): api_url = url + '/filelumis' params = {'block_name': row['block_name']} data, _ = \ getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for rec in json.load(data): run = rec['run_num'] lumi = rec['lumi_section_num'] res.setdefault(run, []).append(lumi) return res
def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') col = self.localcache.conn[self.name][cname] local = col.find_one({'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if not r.has_key('expire')][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def runs_dbs(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS2 for a given dataset" api_url = url + '/runs' params = {'dataset': dataset} data, _ = getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for row in json.load(data): run = row['run']['run_num'] yield run
def runs_dbs(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS2 for a given dataset" api_url = url + '/runs' params = {'dataset': dataset} data, _ = getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for row in json.load(data): run = row['run']['run_num'] yield run
def runs_dbs2(url, dataset, ckey, cert): "Retrive list of run from DBS2 for a given dataset" query = "find run where dataset=%s" % dataset params = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query) data, _ = getdata(url, params, ckey=ckey, cert=cert, system='combined') prim_key = 'run' for row in qlxml_parser(data, prim_key): run = row['run']['run'] yield run
def site4dataset(dbs_url, phedex_api, args, expire): "Yield site information about given dataset" # DBS part dataset = args['dataset'] try: totblocks, totfiles = dataset_summary(dbs_url, dataset) except Exception as err: error = str(err) reason = "Can't find #block, #files info in DBS for dataset=%s" \ % dataset yield {'site': {'error': error, 'reason': reason}} return # Phedex part phedex_args = {'dataset':args['dataset']} headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, post=True, system='phedex') prim_key = 'block' tags = 'block.replica.node' site_info = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) replicas = ddict.get('block.replica') if not isinstance(replicas, list): replicas = [replicas] for row in replicas: if not row or 'node' not in row: continue node = row['node'] files = int(row['files']) complete = 1 if row['complete'] == 'y' else 0 if node in site_info: files = site_info[node]['files'] + files nblks = site_info[node]['blocks'] + 1 bc_val = site_info[node]['blocks_complete'] b_complete = bc_val+1 if complete else bc_val else: b_complete = 1 if complete else 0 nblks = 1 site_info[node] = {'files': files, 'blocks': nblks, 'blocks_complete': b_complete} row = {} for key, val in site_info.iteritems(): if totfiles: nfiles = '%5.2f%%' % (100*float(val['files'])/totfiles) else: nfiles = 'N/A' if totblocks: nblks = '%5.2f%%' % (100*float(val['blocks'])/totblocks) else: nblks = 'N/A' ratio = float(val['blocks_complete'])/val['blocks'] b_completion = '%5.2f%%' % (100*ratio) row = {'site':{'name':key, 'dataset_fraction': nfiles, 'block_fraction': nblks, 'block_completion': b_completion}} yield row
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if url[-1] == '/': url = url[:-1] for key, _val in params.iteritems(): url = '/'.join([url, params[key]]) params = {} return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name)
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if not headers: headers = {'Accept': 'application/json' } # DBS3 always needs that if url.find('datasetlist') != -1: post = True headers['Content-type'] = 'application/json' return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, doseq=False, system=self.name)
def run_lumis_dbs2(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS2 for a given dataset" query = "find run, lumi where dataset=%s" % dataset params = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query) data, _ = getdata(url, params, ckey=ckey, cert=cert, system='combined') prim_key = 'run' res = {} # output result for row in qlxml_parser(data, prim_key): run = row['run']['run'] lumi = row['run']['lumi'] res.setdefault(run, []).append(lumi) return res
def worker_helper(url, query, table='runsummary'): """ Query RunRegistry service, see documentation at https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi url=http://runregistry.web.cern.ch/runregistry/ """ workspace = 'GLOBAL' template = 'json' if table == 'runsummary': columns = [ 'number', 'startTime', 'stopTime', 'triggers', 'runClassName', 'runStopReason', 'bfield', 'gtKey', 'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy', 'runCreated', 'modified', 'lsCount', 'lsRanges' ] elif table == 'runlumis': columns = ['sectionFrom', 'sectionTo', 'runNumber'] sdata = {'filter': query} path = 'api/GLOBAL/%s/%s/%s/none/data' \ % (table, template, urllib.quote(','.join(columns))) callurl = os.path.join(url, path) result, _ = getdata(callurl, sdata, post=True) record = json.load(result) result.close() notations = { 'lsRanges': 'lumi_section_ranges', 'number': 'run_number', 'runCreated': 'create_time', 'runNumber': 'run_number', 'stopTime': 'end_time', 'startTime': 'start_time', 'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason', 'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey', 'lhcEnergy': 'beam_e', 'l1Menu': 'l1key', 'modified': 'modify_time', 'runClassName': 'group_name' } for rec in record: for key, val in rec.items(): if key in notations: rec[notations[key]] = val del rec[key] if table == 'runsummary': yield dict(run=rec) elif table == 'runlumis': if 'sectionTo' in rec and 'sectionFrom' in rec: rec['number'] = [i for i in \ range(rec.pop('sectionFrom'), rec.pop('sectionTo')+1)] yield dict(lumi=rec)
def dataset_summary(dbs_url, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs': # DBS2 call query = 'find count(file.name), count(block.name)' query += ' where dataset=%s and dataset.status=*' % dataset dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs') prim_key = 'dataset' for row in qlxml_parser(source, prim_key): if 'dataset' in row: totfiles = row['dataset']['count_file.name'] totblocks = row['dataset']['count_block.name'] return totblocks, totfiles elif 'error' in row: raise Exception(row.get('reason', row['error'])) # if we're here we didn't find a dataset, throw the error msg = 'empty set' raise Exception(msg) else: # we call filesummaries?dataset=dataset to get number of files/blks dbs_url += '/filesummaries' dbs_args = {'dataset': dataset} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if url.find('https:') != -1: return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) else: return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, system=self.name)
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if url[-1] == '/': url = url[:-1] return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name)
def dbs_dataset4release_parent(dbs_url, release, parent=None): "Get dataset for given release and optional parent dataset" expire = 600 # set some expire since we're not going to use it # we call datasets?release=release to get list of datasets dbs_url += '/datasets' dbs_args = \ {'release_version': release, 'dataset_access_type':'VALID'} if parent: dbs_args.update({'parent_dataset': parent}) headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for rec in json_parser(source, None): for row in rec: yield row['dataset']
def dbs_dataset4release_parent(dbs_url, release, parent=None): "Get dataset for given release and optional parent dataset" expire = 600 # set some expire since we're not going to use it # we call datasets?release=release to get list of datasets dbs_url += '/datasets' dbs_args = \ {'release_version': release, 'dataset_access_type':'VALID'} if parent: dbs_args.update({'parent_dataset': parent}) headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for rec in json_parser(source, None): for row in rec: yield row['dataset']
def site_info(self, phedex_url, site): "Return Phedex site info about given site (rely on local cache)" if abs(self.sites.get('tstamp') - time.time()) > self.thr \ or site not in self.sites: # need to update the cache # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes expire = self.thr args = {} api = phedex_url + '/nodes' headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(api, args, headers, expire, system='phedex') self.sites['tstamp'] = time.time() for rec in json_parser(source, None): for row in rec['phedex']['node']: self.sites[row['name']] = row['kind'] return self.sites.get(site, 'NA')
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if url[-1] == '/': url = url[:-1] for key, _val in params.items(): url = '/'.join([url, params[key]]) params = {} return getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name)
def site_info(self, phedex_url, site): "Return Phedex site info about given site (rely on local cache)" if abs(self.sites.get('tstamp') - time.time()) > self.thr \ or site not in self.sites: # need to update the cache # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes expire = self.thr args = {} api = phedex_url + '/nodes' headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(api, args, headers, expire, system='phedex') self.sites['tstamp'] = time.time() for rec in json_parser(source, None): for row in rec['phedex']['node']: self.sites[row['name']] = row['kind'] return self.sites.get(site, 'NA')
def dataset_summary(dbs_url, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it # we call filesummaries?dataset=dataset to get number of files/blks dbs_url += '/filesummaries' dbs_args = {'dataset': dataset, 'validFileOnly': 1} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if not headers: headers = {"Accept": "application/json"} # DBS3 always needs that return getdata( url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, doseq=False, system=self.name, )
def dataset_summary(dbs_url, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it # we call filesummaries?dataset=dataset to get number of files/blks dbs_url += '/filesummaries' dbs_args = {'dataset': dataset, 'validFileOnly': 1} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def datasets_dbs(urls, verbose=0): """DBS3 implementation of datasets function""" headers = {'Accept':'application/json;text/json'} records = [] url = urls.get('dbs3') + '/datasets' params = {'detail':'True', 'dataset_access_type':'VALID'} data, _ = getdata(url, params, headers, post=False, verbose=verbose, ckey=CKEY, cert=CERT, doseq=False, system='dbs3') records = json.load(data) data.close() dbsdata = {} for row in records: if row['dataset'] not in dbsdata: dbsdata[row['dataset']] = \ dict(era=row['acquisition_era_name'], tier=row['data_tier_name'], status='VALID') for row in phedex_info(urls, dbsdata): yield row
def getdata(self, url, params, expire, headers=None, post=None): """URL call wrapper""" if not headers: headers = {'Accept': 'application/json' } # MCM uses rest API if 'dataset' in params: url = '%s%s' % (url, params.get('dataset')) elif 'mcm' in params: url = '%s/%s' % (url, params.get('mcm')) elif 'prepid' in params: url = '%s/%s' % (url, params.get('prepid')) else: return {} params = {} result = getdata(url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, doseq=False, system=self.name) return result
def worker_helper(url, query, table='runsummary'): """ Query RunRegistry service, see documentation at https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi url=http://runregistry.web.cern.ch/runregistry/ """ workspace = 'GLOBAL' template = 'json' if table == 'runsummary': columns = ['number', 'startTime', 'stopTime', 'triggers', 'runClassName', 'runStopReason', 'bfield', 'gtKey', 'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy', 'runCreated', 'modified', 'lsCount', 'lsRanges'] elif table == 'runlumis': columns = ['sectionFrom', 'sectionTo', 'runNumber'] sdata = {'filter':query} path = 'api/GLOBAL/%s/%s/%s/none/data' \ % (table, template, urllib.quote(','.join(columns))) callurl = os.path.join(url, path) result, _ = getdata(callurl, sdata, post=True) record = json.load(result) result.close() notations = {'lsRanges':'lumi_section_ranges', 'number':'run_number', 'runCreated':'create_time', 'runNumber': 'run_number', 'stopTime': 'end_time', 'startTime': 'start_time', 'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason', 'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey', 'lhcEnergy': 'beam_e', 'l1Menu': 'l1key', 'modified': 'modify_time', 'runClassName': 'group_name'} for rec in record: for key, val in rec.items(): if key in notations: rec[notations[key]] = val del rec[key] if table == 'runsummary': yield dict(run=rec) elif table == 'runlumis': if 'sectionTo' in rec and 'sectionFrom' in rec: rec['number'] = [i for i in \ range(rec.pop('sectionFrom'), rec.pop('sectionTo')+1)] yield dict(lumi=rec)
def dbs_find(entity, url, kwds, verbose=0): "Find DBS3 entity for given set of parameters" if entity not in ['run', 'file', 'block']: msg = 'Unsupported entity key=%s' % entity raise Exception(msg) expire = 600 dataset = kwds.get('dataset', None) block = kwds.get('block_name', None) if not block: # TODO: this should go away when DBS will be retired (user in combined srv) block = kwds.get('block', None) lfn = kwds.get('file', None) runs = kwds.get('runs', []) if not (dataset or block or lfn): return url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value if dataset: params = {'dataset':dataset} elif block: params = {'block_name': block} elif lfn: params = {'logical_file_name': lfn} if runs: params.update({'run_num': runs}) headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row: try: if isinstance(rec, basestring): print(dastimestamp('DBS3 ERROR:'), row) elif entity == 'file': yield rec['logical_file_name'] elif entity == 'block': yield rec['block_name'] elif entity == 'file': yield rec['dataset'] except Exception as exp: msg = 'Fail to parse "%s", exception="%s"' % (rec, exp) print_exc(msg)
def proxy_getdata(urls): "Get data for given URLs via proxy server" try: result = [r for r in urlfetch_proxy([])] except Exception as _exc: result = [] if len(result) == 1 and result[0] == {'ping':'pong'}: for row in urlfetch_proxy(urls): yield row else: # sequential access error_expire = 60 expire = 60 post = False verbose = False params = {} headers = {} for url in urls: data, _ = getdata(url, params, headers, expire, post, error_expire, verbose, CKEY, CERT) yield data.read()
def proxy_getdata(urls): "Get data for given URLs via proxy server" try: result = [r for r in urlfetch_proxy([])] except Exception as _exc: result = [] if len(result) == 1 and result[0] == {'ping': 'pong'}: for row in urlfetch_proxy(urls): yield row else: # sequential access error_expire = 60 expire = 60 post = False verbose = False params = {} headers = {} for url in urls: data, _ = getdata(url, params, headers, expire, post, error_expire, verbose, CKEY, CERT) yield data.read()
def datasets_dbs2(urls, verbose=0): """DBS2 implementation of datasets function""" headers = {'Accept':'application/xml;text/xml'} records = [] url = urls.get('dbs') query = \ 'find dataset,dataset.tier,dataset.era where dataset.status like VALID*' params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} stream, _ = getdata(url, params, headers, post=False, \ ckey=CKEY, cert=CERT, verbose=verbose, system='dbs') records = [r for r in qlxml_parser(stream, 'dataset')] stream.close() dbsdata = {} for row in records: dataset = row['dataset'] if dataset['dataset'] not in dbsdata: dbsdata[dataset['dataset']] = \ dict(era=dataset['dataset.era'], tier=dataset['dataset.tier'], status='VALID') for row in phedex_info(urls, dbsdata): yield row
def dbs_find(entity, url, kwds, verbose=0): "Find DBS3 entity for given set of parameters" if entity not in ["run", "file", "block"]: msg = "Unsupported entity key=%s" % entity raise Exception(msg) expire = 600 dataset = kwds.get("dataset", None) block = kwds.get("block_name", None) if not block: # TODO: this should go away when DBS will be retired (user in combined srv) block = kwds.get("block", None) lfn = kwds.get("file", None) runs = kwds.get("runs", []) if not (dataset or block or lfn): return url = "%s/%ss" % (url, entity) # DBS3 APIs use plural entity value if dataset: params = {"dataset": dataset} elif block: params = {"block_name": block} elif lfn: params = {"logical_file_name": lfn} if runs: params.update({"run_num": runrange(runs[0], runs[-1], False)}) headers = {"Accept": "application/json;text/json"} source, expire = getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row: try: if isinstance(rec, basestring): print dastimestamp("DBS3 ERROR:"), row elif entity == "file": yield rec["logical_file_name"] elif entity == "block": yield rec["block_name"] elif entity == "file": yield rec["dataset"] except Exception as exp: msg = 'Fail to parse "%s", exception="%s"' % (rec, exp) print_exc(msg)
def get_ids(url, params, dataset, verbose=False): "Query either ReqMgr or WMStats and retrieve request ids" headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here ids = [] source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] if not doc: continue if 'ProcConfigCacheID' in doc: ids.append(doc['ProcConfigCacheID']) elif 'ConfigCacheID' in doc: ids.append(doc['ConfigCacheID']) elif 'SkimConfigCacheID' in doc: ids.append(doc['SkimConfigCacheID']) else: if 'id' in rec and 'key' in rec and rec['key'] == dataset: ids.append(rec['id']) return ids
def get_ids(url, params, dataset, verbose=False): "Query either ReqMgr2 or WMStats and retrieve request ids" headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here ids = [] source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] found = 0 if not doc: continue for key in doc.keys(): if key.endswith("ConfigCacheID"): ids.append(doc[key]) found += 1 if not found: if 'id' in rec and 'key' in rec and rec['key'] == dataset: if rec['id']: ids.append(rec['id']) return ids
def get_ids(url, params, dataset, verbose=False): "Query either ReqMgr or WMStats and retrieve request ids" headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here ids = [] source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] if not doc: continue if 'ProcConfigCacheID' in doc: ids.append(doc['ProcConfigCacheID']) elif 'ConfigCacheID' in doc: ids.append(doc['ConfigCacheID']) elif 'SkimConfigCacheID' in doc: ids.append(doc['SkimConfigCacheID']) else: if 'id' in rec and 'key' in rec and rec['key'] == dataset: ids.append(rec['id']) return ids
def site4dataset(dbs_url, phedex_api, args, expire): "Yield site information about given dataset" # DBS part dataset = args['dataset'] try: totblocks, totfiles = dataset_summary(dbs_url, dataset) except Exception as err: error = 'combined service unable to process your request' reason = "Fail to parse #block, #files info, %s" % str(err) yield { 'site': { 'name': 'N/A', 'se': 'N/A', 'error': error, 'reason': reason } } return # Phedex part phedex_args = {'dataset': args['dataset']} headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' site_info = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) replicas = ddict.get('block.replica') if not isinstance(replicas, list): replicas = [replicas] for row in replicas: if not row or 'node' not in row: continue node = row['node'] files = int(row['files']) complete = 1 if row['complete'] == 'y' else 0 if node in site_info: files = site_info[node]['files'] + files nblks = site_info[node]['blocks'] + 1 bc_val = site_info[node]['blocks_complete'] b_complete = bc_val + 1 if complete else bc_val else: b_complete = 1 if complete else 0 nblks = 1 site_info[node] = { 'files': files, 'blocks': nblks, 'blocks_complete': b_complete } row = {} for key, val in site_info.items(): if totfiles: nfiles = '%5.2f%%' % (100 * float(val['files']) / totfiles) else: nfiles = 'N/A' if totblocks: nblks = '%5.2f%%' % (100 * float(val['blocks']) / totblocks) else: nblks = 'N/A' ratio = float(val['blocks_complete']) / val['blocks'] b_completion = '%5.2f%%' % (100 * ratio) row = { 'site': { 'name': key, 'dataset_fraction': nfiles, 'block_fraction': nblks, 'block_completion': b_completion } } yield row
def helper(self, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = self.map[api]['services'][self.dbs] phedex_url = self.map[api]['services']['phedex'] # make phedex_api from url, but use xml version for processing phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas' if api == 'dataset4site_release' or \ api == 'dataset4site_release_parent' or \ api == 'child4site_release_dataset': # DBS part datasets = set() release = args['release'] parent = args.get('parent', None) for row in dbs_dataset4release_parent(dbs_url, release, parent): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = { 'dataset': list(datasets), 'se': '%s' % args['site'] } else: phedex_args = { 'dataset': list(datasets), 'node': '%s*' % args['site'] } headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found_dataset in found: val = found[found_dataset] found[found_dataset] = { 'bytes': val['bytes'] + bbytes, 'files': val['files'] + files } else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.items(): record = dict(name=name, size=val['bytes'], files=val['files']) if api == 'child4site_release_dataset': yield {'child': record} else: yield {'dataset': record} del datasets del found if api == 'site4dataset': try: gen = site4dataset(dbs_url, phedex_api, args, expire) for row in gen: sname = row.get('site', {}).get('name', '') skind = self.site_info(phedex_url, sname) row['site'].update({'kind': skind}) yield row except Exception as err: print_exc(err) tstamp = dastimestamp('') msg = tstamp + ' Exception while processing DBS/Phedex info:' msg += str(err) row = { 'site': { 'name': 'Fail to look-up site info', 'error': msg, 'dataset_fraction': 'N/A', 'block_fraction': 'N/A', 'block_completion': 'N/A' }, 'error': msg } yield row if api == 'files4dataset_runs_site' or \ api == 'files4block_runs_site': run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) files = dbs_find('file', dbs_url, args) site = args.get('site') phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas' for fname in files4site(phedex_api, files, site): yield {'file': {'name': fname}}