def get_file_run_lumis(url, api, args, verbose=0): "Helper function to deal with file,run,lumi requests" run_value = args.get("run_num", []) if isinstance(run_value, dict) and "$in" in run_value: runs = run_value["$in"] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] elif run_value[0] == "[" and run_value[-1] == "]": runs = json.loads(run_value) else: runs = [] args.update({"runs": runs}) blk = args.get("block_name", None) if blk: # we don't need to look-up blocks blocks = [blk] else: blocks = dbs_find("block", url, args, verbose) if not blocks: return gen = file_run_lumis(url, blocks, runs, verbose) key = "file_run" if api.startswith("run_lumi"): key = "run" if api.startswith("file_lumi"): key = "file" if api.startswith("file_run_lumi"): key = "file_run" for row in process_lumis_with(key, gen): yield row
def adjust_value(val): "Adjust value to DAS patterns" if date_yyyymmdd_pattern.match(val): return das_dateformat(val) elif int_number_pattern.match(val): return int(val) return val
def url_args(url, convert_types=False): """ Extract args from given url, e.g. http://a.b.com/api?arg1=1&arg2=2 will yield {'arg1':1, 'arg2':2} """ args = {} for item in url.split("?")[-1].split('&'): key, value = item.split('=') if convert_types: if int_number_pattern.match(value): args[key] = int(value) else: args[key] = value else: args[key] = value return args
def size_format(uinput): """ Format file size utility, it converts file size into KB, MB, GB, TB, PB units """ if not (float_number_pattern.match(str(uinput)) or \ int_number_pattern.match(str(uinput))): return 'N/A' try: num = float(uinput) except Exception as exc: print_exc(exc) return "N/A" base = 1000. # power of 10, or use 1024. for power of 2 for xxx in ['', 'KB', 'MB', 'GB', 'TB', 'PB']: if num < base: return "%3.1f%s" % (num, xxx) num /= base
def get_block_run_lumis(url, api, args, verbose=0): "Helper function to deal with block,run,lumi requests" run_value = args.get('run_num', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) blocks = dbs_find('block', url, args, verbose) gen = block_run_lumis(url, blocks, runs, verbose) key = 'block_run' for row in process_lumis_with(key, gen): yield row
def get_file_run_lumis(url, api, args, verbose=0): "Helper function to deal with file,run,lumi requests" run_value = args.get('run_num', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] elif run_value[0]=='[' and run_value[-1]==']': if '-' in run_value: # continuous range runs = run_value.replace("'", '').replace('[', '').replace(']', '') else: runs = json.loads(run_value) else: runs = run_value args.update({'runs': runs}) blk = args.get('block_name', None) if blk: # we don't need to look-up blocks blocks = [blk] else: blocks = dbs_find('block', url, args, verbose) if not blocks: return valid = 1 if args.get('validFileOnly', '') else 0 gen = file_run_lumis(url, blocks, runs, valid, verbose) key = 'file_run' if api.startswith('run_lumi'): key = 'run' if api.startswith('file_lumi'): key = 'file' if api.startswith('file_run'): key = 'file_run' if api.startswith('file_run_lumi'): key = 'file_run' if api.startswith('file_run_lumi_events'): key = 'file_run' for row in process_lumis_with(key, gen): yield row
def get_file_run_lumis(url, api, args): "Helper function to deal with file,run,lumi requests" run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) blocks = dbs_find('block', url, args) gen = file_run_lumis(url, blocks, runs) if api.startswith('run_lumi'): key = 'run' if api.startswith('file_lumi'): key = 'file' if api.startswith('file_run_lumi'): key = 'file_run' for row in process_lumis_with(key, gen): yield row
def lumi_evts(rdict): "Helper function to show lumi-events pairs suitable for web UI" run = rdict['Run number'] lumis = rdict['Luminosity number'] events = rdict['Events'] pdict = dict(zip(lumis, events)) pkeys = [str(k) for k in pdict.keys()] tag = 'id_%s_%s' % (run, ''.join(pkeys)) link = 'link_%s_%s' % (run, ''.join(pkeys)) hout = '<div class="hide" id="%s" name="%s">' % (tag, tag) tot_evts = 0 for idx, lumi in enumerate(sorted(pdict.keys())): evts = pdict[lumi] if evts != 'NA' and evts and int_number_pattern.match(str(evts)): tot_evts += int(evts) hout += 'Lumi: %s, Events %s<br/>' % (lumi, evts) hout += "</div>" out = """ <em>lumis/events pairs</em>\ <a href="javascript:ToggleTag('%s', '%s')" id="%s">show</a>""" \ % (tag, link, link) if tot_evts: out += ' Total events=%s' % tot_evts out += hout return out
def helper(self, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = self.map[api]['services'][self.dbs] phedex_url = self.map[api]['services']['phedex'] # make phedex_api from url, but use xml version for processing phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas' if api == 'dataset4site_release' or \ api == 'dataset4site_release_parent' or \ api == 'child4site_release_dataset': # DBS part datasets = set() release = args['release'] parent = args.get('parent', None) for row in dbs_dataset4release_parent(dbs_url, release, parent): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = {'dataset':list(datasets), 'se': '%s' % args['site']} else: phedex_args = {'dataset':list(datasets), 'node': '%s*' % args['site']} headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found_dataset in found: val = found[found_dataset] found[found_dataset] = {'bytes': val['bytes'] + bbytes, 'files': val['files'] + files} else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.items(): record = dict(name=name, size=val['bytes'], files=val['files']) if api == 'child4site_release_dataset': yield {'child': record} else: yield {'dataset':record} del datasets del found if api == 'site4block': pass if api == 'site4dataset': try: gen = site4dataset(dbs_url, phedex_api, args, expire) for row in gen: sname = row.get('site', {}).get('name', '') skind = self.site_info(phedex_url, sname) row['site'].update({'kind':skind}) yield row except Exception as err: print_exc(err) tstamp = dastimestamp('') msg = tstamp + ' Exception while processing DBS/Phedex info:' msg += str(err) row = {'site':{'name':'Fail to look-up site info', 'error':msg, 'dataset_fraction': 'N/A', 'block_fraction':'N/A', 'block_completion':'N/A'}, 'error': msg} yield row if api == 'files4dataset_runs_site' or \ api == 'files4block_runs_site': run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) files = dbs_find('file', dbs_url, args) site = args.get('site') phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas' for fname in files4site(phedex_api, files, site): yield {'file':{'name':fname}}
def helper(self, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = self.map[api]['services'][self.dbs] phedex_url = self.map[api]['services']['phedex'] # make phedex_api from url, but use xml version for processing phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas' if api == 'dataset4site_release' or \ api == 'dataset4site_release_parent' or \ api == 'child4site_release_dataset': # DBS part datasets = set() release = args['release'] parent = args.get('parent', None) for row in dbs_dataset4release_parent(dbs_url, release, parent): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = { 'dataset': list(datasets), 'se': '%s' % args['site'] } else: phedex_args = { 'dataset': list(datasets), 'node': '%s*' % args['site'] } headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found_dataset in found: val = found[found_dataset] found[found_dataset] = { 'bytes': val['bytes'] + bbytes, 'files': val['files'] + files } else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.items(): record = dict(name=name, size=val['bytes'], files=val['files']) if api == 'child4site_release_dataset': yield {'child': record} else: yield {'dataset': record} del datasets del found if api == 'site4dataset': try: gen = site4dataset(dbs_url, phedex_api, args, expire) for row in gen: sname = row.get('site', {}).get('name', '') skind = self.site_info(phedex_url, sname) row['site'].update({'kind': skind}) yield row except Exception as err: print_exc(err) tstamp = dastimestamp('') msg = tstamp + ' Exception while processing DBS/Phedex info:' msg += str(err) row = { 'site': { 'name': 'Fail to look-up site info', 'error': msg, 'dataset_fraction': 'N/A', 'block_fraction': 'N/A', 'block_completion': 'N/A' }, 'error': msg } yield row if api == 'files4dataset_runs_site' or \ api == 'files4block_runs_site': run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) files = dbs_find('file', dbs_url, args) site = args.get('site') phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas' for fname in files4site(phedex_api, files, site): yield {'file': {'name': fname}}
def parse_filter(spec, flt): """ Parse given filter and return MongoDB key/value dictionary. Be smart not to overwrite spec condition of DAS query. """ if flt.find('=') != -1 and flt.find('!=') == -1 and\ (flt.find('<') == -1 and flt.find('>') == -1): key, val = flt.split('=') if int_number_pattern.match(str(val)): val = int(val) elif float_number_pattern.match(str(val)): val = float(val) elif isinstance(val, str) or isinstance(val, unicode): if val.find('*') != -1: val = re.compile('%s' % val.replace('*', '.*')) val = parse_filter_string(val) return {key:val} elif flt.find('!=') != -1 and \ (flt.find('<') == -1 and flt.find('>') == -1): key, val = flt.split('!=') if int_number_pattern.match(str(val)): val = int(val) elif float_number_pattern.match(str(val)): val = float(val) elif isinstance(val, str) or isinstance(val, unicode): if val.find('*') != -1: # val = re.compile('%s' % val.replace('*', '.*')) val = re.compile('^(?:(?!%s).)*$' % val.replace('*', '.*')) else: val = re.compile('^(?:(?!%s).)*$' % val) val = parse_filter_string(val) return {key: val} return {key: {'$ne': val}} elif flt.find('<=') != -1: key, val = flt.split('<=') if int_number_pattern.match(str(val)): val = int(val) if float_number_pattern.match(str(val)): val = float(val) return {key: {'$lte': val}} elif flt.find('<') != -1: key, val = flt.split('<') if int_number_pattern.match(str(val)): val = int(val) if float_number_pattern.match(str(val)): val = float(val) return {key: {'$lt': val}} elif flt.find('>=') != -1: key, val = flt.split('>=') if int_number_pattern.match(str(val)): val = int(val) if float_number_pattern.match(str(val)): val = float(val) return {key: {'$gte': val}} elif flt.find('>') != -1: key, val = flt.split('>') if int_number_pattern.match(str(val)): val = int(val) if float_number_pattern.match(str(val)): val = float(val) return {key: {'$gt': val}} else: if not spec.get(flt, None) and flt != 'unique': return {flt:{'$exists':True}} return {}
def parse(self, query): "Parse input query" spec = {} filters = {} aggregators = [] fields = [] keys = [] pipe = [] relaxed_query = relax(query, self.operators).split() if self.verbose: print("\n### input query=%s, relaxed=%s" % (query, relaxed_query)) tot = len(relaxed_query) idx = 0 while idx < tot: item = relaxed_query[idx] if self.verbose > 1: print("parse item", item) if item == '|': step = self.parse_pipe(relaxed_query[idx:], filters, aggregators) idx += step if item == ',': idx += 1 continue next_elem = relaxed_query[idx+1] if idx+1 < tot else None next_next_elem = relaxed_query[idx+2] if idx+2 < tot else None if self.verbose > 1: print("### parse items", item, next_elem, next_next_elem) if next_elem and (next_elem == ',' or next_elem in self.daskeys): if item in self.daskeys: fields.append(item) idx += 1 continue elif next_elem in self.operators: oper = next_elem if item not in self.daskeys+self.specials: error(relaxed_query, idx, 'Wrong DAS key') if next_next_elem.startswith('['): val, step = parse_array(relaxed_query[idx:], next_elem, item) spec.update(spec_entry(item, next_elem, val)) idx += step elif next_elem in ['in', 'beetween'] and \ not next_next_elem.startswith('['): msg = '"%s" operator ' % next_elem msg += 'should be followed by square bracket value' error(relaxed_query, idx, msg) elif next_next_elem.startswith('"'): val, step = parse_quotes(relaxed_query[idx:], '"') spec.update(spec_entry(item, next_elem, val)) idx += step elif next_next_elem.startswith("'"): val, step = parse_quotes(relaxed_query[idx:], "'") spec.update(spec_entry(item, next_elem, val)) idx += step else: if float_number_pattern.match(next_next_elem): next_next_elem = float(next_next_elem) elif int_number_pattern.match(next_next_elem) and \ not date_yyyymmdd_pattern.match(next_next_elem): next_next_elem = int(next_next_elem) elif next_next_elem in self.daskeys: msg = 'daskey operator daskey structure is not allowed' error(relaxed_query, idx, msg) spec.update(spec_entry(item, next_elem, next_next_elem)) idx += 3 continue elif item == '|': step = self.parse_pipe(relaxed_query[idx:], filters, aggregators) idx += step elif not next_elem and not next_next_elem: if item in self.daskeys: fields.append(item) idx += 1 else: error(relaxed_query, idx, 'Not a DAS key') else: error(relaxed_query, idx) out = {} for word in ['instance', 'system']: if word in spec: out[word] = spec.pop(word) if not fields: fields = [k for k in spec.keys() if k in self.daskeys] if len(fields) > 1: fields = None # ambiguous spec, we don't know which field to look-up if fields and not spec: error(relaxed_query, 0, 'No conditition specified') out['fields'] = fields out['spec'] = spec # perform cross-check of filter values for key, item in filters.items(): if key not in ['grep', 'sort']: continue for val in item: daskeyvalue_check(query, val, self.daskeys) # perform cross-check of aggregator values for _, val in aggregators: daskeyvalue_check(query, val, self.daskeys) if filters: out['filters'] = filters if aggregators: out['aggregators'] = aggregators if self.verbose: print("MongoDB query: %s" % out) return out
def parse_helper(self, query): "Parse input query" spec = {} filters = {} aggregators = [] fields = [] keys = [] pipe = [] relaxed_query = relax(query, self.operators).split() if self.verbose: print("\n### input query=%s, relaxed=%s" % (query, relaxed_query)) tot = len(relaxed_query) idx = 0 while idx < tot: item = relaxed_query[idx] if self.verbose > 1: print("parse item", item) if item == '|': step = self.parse_pipe(relaxed_query[idx:], filters, aggregators) idx += step if item == ',': idx += 1 continue next_elem = relaxed_query[idx + 1] if idx + 1 < tot else None next_next_elem = relaxed_query[idx + 2] if idx + 2 < tot else None if self.verbose > 1: print("### parse items", item, next_elem, next_next_elem) if next_elem and (next_elem == ',' or next_elem in self.daskeys): if item in self.daskeys: fields.append(item) idx += 1 continue elif next_elem in self.operators: oper = next_elem if item not in self.daskeys + self.specials: error(relaxed_query, idx, 'Wrong DAS key') if next_next_elem.startswith('['): val, step = parse_array(relaxed_query[idx:], next_elem, item) spec.update(spec_entry(item, next_elem, val)) idx += step elif next_elem in ['in', 'beetween'] and \ not next_next_elem.startswith('['): msg = '"%s" operator ' % next_elem msg += 'should be followed by square bracket value' error(relaxed_query, idx, msg) elif next_next_elem.startswith('"'): val, step = parse_quotes(relaxed_query[idx:], '"') spec.update(spec_entry(item, next_elem, val)) idx += step elif next_next_elem.startswith("'"): val, step = parse_quotes(relaxed_query[idx:], "'") spec.update(spec_entry(item, next_elem, val)) idx += step else: if float_number_pattern.match(next_next_elem): next_next_elem = float(next_next_elem) elif int_number_pattern.match(next_next_elem) and \ not date_yyyymmdd_pattern.match(next_next_elem): next_next_elem = int(next_next_elem) elif next_next_elem in self.daskeys: msg = 'daskey operator daskey structure is not allowed' error(relaxed_query, idx, msg) spec.update(spec_entry(item, next_elem, next_next_elem)) idx += 3 continue elif item == '|': step = self.parse_pipe(relaxed_query[idx:], filters, aggregators) idx += step elif not next_elem and not next_next_elem: if item in self.daskeys: fields.append(item) idx += 1 else: error(relaxed_query, idx, 'Not a DAS key') else: error(relaxed_query, idx) out = {} for word in ['instance', 'system']: if word in spec: out[word] = spec.pop(word) if not fields: fields = [k for k in spec.keys() if k in self.daskeys] if len(fields) > 1: fields = None # ambiguous spec, we don't know which field to look-up if fields and not spec: error(relaxed_query, 0, 'No conditition specified') out['fields'] = fields out['spec'] = spec # perform cross-check of filter values for key, item in filters.items(): if key not in ['grep', 'sort']: continue for val in item: daskeyvalue_check(query, val, self.daskeys) # perform cross-check of aggregator values for _, val in aggregators: daskeyvalue_check(query, val, self.daskeys) if filters: out['filters'] = filters if aggregators: out['aggregators'] = aggregators if self.verbose: print("MongoDB query: %s" % out) return out