def findReqMgrIds(dataset, base='https://cmsweb.cern.ch', verbose=False): """ Find ReqMgrIds for a given dataset. This is quite complex procedure in CMS. We need to query ReqMgr data-service cache and find workflow ids by outputdataset name. The ReqMgr returns either document with ids used by MCM (i.e. ProcConfigCacheID, ConfigCacheID, SkimConfigCacheID) or we can take id of the request which bypass MCM. For refences see these discussions: https://github.com/dmwm/DAS/issues/4045 https://hypernews.cern.ch/HyperNews/CMS/get/dmDevelopment/1501/1/1/1/1.html """ params = {'key': '"%s"' % dataset, 'include_docs':'true'} url = "%s/couchdb/reqmgr_workload_cache/_design/ReqMgr/_view/byoutputdataset" \ % base headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) ids = [] for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] if 'ProcConfigCacheID' in doc: ids.append(doc['ProcConfigCacheID']) elif 'ConfigCacheID' in doc: ids.append(doc['ConfigCacheID']) elif 'SkimConfigCacheID' in doc: ids.append(doc['SkimConfigCacheID']) else: if 'id' in rec and 'key' in rec and rec['key'] == dataset: ids.append(rec['id']) return ids
def dbs_dataset4site_release(dbs_url, release): "Get dataset for given site and release" expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs': # in DBS3 I'll use datasets API and pass release over there query = 'find dataset where release=%s' % release dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs') prim_key = 'dataset' for row in qlxml_parser(source, prim_key): if 'dataset' in row: dataset = row['dataset']['dataset'] yield dataset elif 'error' in row: err = row.get('reason', None) err = err if err else row['error'] yield 'DBS error: %s' % err else: # we call datasets?release=release to get list of datasets dbs_url += '/datasets' dbs_args = \ {'release_version': release, 'dataset_access_type':'VALID'} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for rec in json_parser(source, None): for row in rec: yield row['dataset']
def parser(self, dasquery, dformat, data, api): "DAS parser for MCM data-service" prim_key = self.dasmapping.primary_key(self.name, api) gen = json_parser(data, self.logger) counter = 0 for rec in gen: if 'results' in rec: row = rec['results'] else: row = rec for key in ['_id', '_rev']: if key in row: del row[key] if row: if api == 'dataset4mcm': for val in row.values(): if isinstance(val, basestring): yield {'dataset':{'name': val}} elif isinstance(val, list): for vvv in val: yield {'dataset':{'name': vvv}} else: yield {'mcm':row} counter += 1 msg = "api=%s, format=%s " % (api, dformat) msg += "prim_key=%s yield %s rows" % (prim_key, counter) self.logger.info(msg)
def dataset_summary(dbs_url, getdata, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs2': # DBS2 call query = 'find count(file.name), count(block.name) where dataset=%s'\ % dataset dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = getdata(dbs_url, dbs_args, expire, headers) prim_key = 'dataset' datasets = set() for row in qlxml_parser(source, prim_key): totfiles = row['dataset']['count_file.name'] totblocks = row['dataset']['count_block.name'] return totblocks, totfiles else: # we call filesummaries?dataset=dataset to get number of files/blks dbs_args = {'dataset': dataset} headers = {'Accept': 'application/json;text/json'} source, expire = getdata(dbs_url, dbs_args, expire, headers) for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def parser(self, dasquery, dformat, data, api): "DAS parser for MCM data-service" prim_key = self.dasmapping.primary_key(self.name, api) gen = json_parser(data, self.logger) counter = 0 for rec in gen: if 'results' in rec: row = rec['results'] else: row = rec for key in ['_id', '_rev']: if key in row: del row[key] if row: if api == 'dataset4mcm': for val in row.values(): if isinstance(val, basestring): yield {'dataset': {'name': val}} elif isinstance(val, list): for vvv in val: yield {'dataset': {'name': vvv}} else: yield {'mcm': row} counter += 1 msg = "api=%s, format=%s " % (api, dformat) msg += "prim_key=%s yield %s rows" % (prim_key, counter) self.logger.info(msg)
def parser(self, dasquery, dformat, data, api): """ DAS data parser. Input parameters: - *query* input DAS query - *dformat* is a data format, e.g. XML, JSON - *data* is a data source, either file-like object or actual data - *api* is API name """ prim_key = self.dasmapping.primary_key(self.name, api) apitag = self.dasmapping.apitag(self.name, api) counter = 0 if dformat.lower() == 'xml': tags = self.dasmapping.api2daskey(self.name, api) gen = xml_parser(data, prim_key, tags) for row in gen: counter += 1 yield row elif dformat.lower() == 'json' or dformat.lower() == 'dasjson': gen = json_parser(data, self.logger) das_dict = {} for row in gen: if dformat.lower() == 'dasjson': for key, val in row.iteritems(): if key != 'results': das_dict[key] = val row = row['results'] self.analytics.update_apicall(\ dasquery.mongo_query, das_dict) if apitag and row.has_key(apitag): row = row[apitag] if isinstance(row, list): for item in row: if item.has_key(prim_key): counter += 1 yield item else: counter += 1 yield {prim_key:item} else: if row.has_key(prim_key): counter += 1 yield row else: counter += 1 yield {prim_key:row} else: msg = 'Unsupported data format="%s", API="%s"' % (dformat, api) raise Exception(msg) msg = "api=%s, format=%s " % (api, dformat) msg += "prim_key=%s yield %s rows" % (prim_key, counter) self.logger.info(msg)
def parser(self, dasquery, dformat, data, api): """ DAS data parser. Input parameters: - *query* input DAS query - *dformat* is a data format, e.g. XML, JSON - *data* is a data source, either file-like object or actual data - *api* is API name """ prim_key = self.dasmapping.primary_key(self.name, api) counter = 0 if dformat.lower() == 'xml': tags = self.dasmapping.api2daskey(self.name, api) gen = xml_parser(data, prim_key, tags) for row in gen: counter += 1 yield row elif dformat.lower() == 'json' or dformat.lower() == 'dasjson': gen = json_parser(data, self.logger) das_dict = {} for row in gen: if dformat.lower() == 'dasjson': for key, val in row.items(): if key != 'results': das_dict[key] = val row = row['results'] if isinstance(row, list): for item in row: if item: if prim_key in item: counter += 1 yield item else: counter += 1 yield {prim_key: item} else: if prim_key in row: counter += 1 yield row else: counter += 1 yield {prim_key: row} else: msg = 'Unsupported data format="%s", API="%s"' % (dformat, api) raise Exception(msg) msg = "api=%s, format=%s " % (api, dformat) msg += "prim_key=%s yield %s rows" % (prim_key, counter) self.logger.info(msg)
def test_json_parser(self): """ Test functionality of json_parser """ jsondata = {'beer': {'amstel':'good', 'guiness':'better'}} fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = file(fname, 'w') stream.write(json.dumps(jsondata)) stream.close() stream = file(fname, 'r') gen = json_parser(stream) result = gen.next() expect = {'beer': {'amstel': 'good', 'guiness': 'better'}} self.assertEqual(expect, result)
def test_json_parser(self): """ Test functionality of json_parser """ jsondata = {'beer': {'amstel': 'good', 'guiness': 'better'}} fdesc = tempfile.NamedTemporaryFile() fname = fdesc.name stream = open(fname, 'w') stream.write(json.dumps(jsondata)) stream.close() stream = open(fname, 'r') gen = json_parser(stream) result = next(gen) expect = {'beer': {'amstel': 'good', 'guiness': 'better'}} self.assertEqual(expect, result)
def parser(self, dasquery, dformat, data, api): """ DAS data parser. Input parameters: - *query* input DAS query - *dformat* is a data format, e.g. XML, JSON - *data* is a data source, either file-like object or actual data - *api* is API name """ prim_key = self.dasmapping.primary_key(self.name, api) counter = 0 if dformat.lower() == "xml": tags = self.dasmapping.api2daskey(self.name, api) gen = xml_parser(data, prim_key, tags) for row in gen: counter += 1 yield row elif dformat.lower() == "json" or dformat.lower() == "dasjson": gen = json_parser(data, self.logger) das_dict = {} for row in gen: if dformat.lower() == "dasjson": for key, val in row.iteritems(): if key != "results": das_dict[key] = val row = row["results"] if isinstance(row, list): for item in row: if prim_key in item: counter += 1 yield item else: counter += 1 yield {prim_key: item} else: if prim_key in row: counter += 1 yield row else: counter += 1 yield {prim_key: row} else: msg = 'Unsupported data format="%s", API="%s"' % (dformat, api) raise Exception(msg) msg = "api=%s, format=%s " % (api, dformat) msg += "prim_key=%s yield %s rows" % (prim_key, counter) self.logger.info(msg)
def dbs_dataset4release_parent(dbs_url, release, parent=None): "Get dataset for given release and optional parent dataset" expire = 600 # set some expire since we're not going to use it # we call datasets?release=release to get list of datasets dbs_url += '/datasets' dbs_args = \ {'release_version': release, 'dataset_access_type':'VALID'} if parent: dbs_args.update({'parent_dataset': parent}) headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for rec in json_parser(source, None): for row in rec: yield row['dataset']
def parser(self, query, dformat, source, api): """ DBS3 data-service parser. """ if api == 'site4dataset': sites = set() for rec in json_parser(source, self.logger): if isinstance(rec, list): for row in rec: orig_site = row['origin_site_name'] if orig_site not in sites: sites.add(orig_site) else: orig_site = rec.get('origin_site_name', None) if orig_site and orig_site not in sites: sites.add(orig_site) for site in sites: yield {'site': {'name': site}} elif api == 'filesummaries': gen = DASAbstractService.parser(self, query, dformat, source, api) for row in gen: yield row['dataset'] elif api == 'blockparents': gen = DASAbstractService.parser(self, query, dformat, source, api) for row in gen: try: del row['parent']['this_block_name'] except: pass yield row elif api == 'fileparents': gen = DASAbstractService.parser(self, query, dformat, source, api) for row in gen: parent = row['parent'] for val in parent['parent_logical_file_name']: yield dict(name=val) elif api == 'filechildren': gen = DASAbstractService.parser(self, query, dformat, source, api) for row in gen: parent = row['child'] for val in parent['child_logical_file_name']: yield dict(name=val) else: gen = DASAbstractService.parser(self, query, dformat, source, api) for row in gen: yield row
def site_info(self, phedex_url, site): "Return Phedex site info about given site (rely on local cache)" if abs(self.sites.get('tstamp') - time.time()) > self.thr \ or site not in self.sites: # need to update the cache # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes expire = self.thr args = {} api = phedex_url + '/nodes' headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(api, args, headers, expire, system='phedex') self.sites['tstamp'] = time.time() for rec in json_parser(source, None): for row in rec['phedex']['node']: self.sites[row['name']] = row['kind'] return self.sites.get(site, 'NA')
def dataset_summary(dbs_url, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it # we call filesummaries?dataset=dataset to get number of files/blks dbs_url += '/filesummaries' dbs_args = {'dataset': dataset, 'validFileOnly': 1} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def dbs_find(entity, url, kwds, verbose=0): "Find DBS3 entity for given set of parameters" if entity not in ['run', 'file', 'block']: msg = 'Unsupported entity key=%s' % entity raise Exception(msg) expire = 600 dataset = kwds.get('dataset', None) block = kwds.get('block_name', None) if not block: # TODO: this should go away when DBS will be retired (user in combined srv) block = kwds.get('block', None) lfn = kwds.get('file', None) runs = kwds.get('runs', []) if not (dataset or block or lfn): return url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value if dataset: params = {'dataset':dataset} elif block: params = {'block_name': block} elif lfn: params = {'logical_file_name': lfn} if runs: params.update({'run_num': runs}) headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row: try: if isinstance(rec, basestring): print(dastimestamp('DBS3 ERROR:'), row) elif entity == 'file': yield rec['logical_file_name'] elif entity == 'block': yield rec['block_name'] elif entity == 'file': yield rec['dataset'] except Exception as exp: msg = 'Fail to parse "%s", exception="%s"' % (rec, exp) print_exc(msg)
def dbs_find(entity, url, kwds, verbose=0): "Find DBS3 entity for given set of parameters" if entity not in ["run", "file", "block"]: msg = "Unsupported entity key=%s" % entity raise Exception(msg) expire = 600 dataset = kwds.get("dataset", None) block = kwds.get("block_name", None) if not block: # TODO: this should go away when DBS will be retired (user in combined srv) block = kwds.get("block", None) lfn = kwds.get("file", None) runs = kwds.get("runs", []) if not (dataset or block or lfn): return url = "%s/%ss" % (url, entity) # DBS3 APIs use plural entity value if dataset: params = {"dataset": dataset} elif block: params = {"block_name": block} elif lfn: params = {"logical_file_name": lfn} if runs: params.update({"run_num": runrange(runs[0], runs[-1], False)}) headers = {"Accept": "application/json;text/json"} source, expire = getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row: try: if isinstance(rec, basestring): print dastimestamp("DBS3 ERROR:"), row elif entity == "file": yield rec["logical_file_name"] elif entity == "block": yield rec["block_name"] elif entity == "file": yield rec["dataset"] except Exception as exp: msg = 'Fail to parse "%s", exception="%s"' % (rec, exp) print_exc(msg)
def dataset_summary(dbs_url, dataset): """ Invoke DBS2/DBS3 call to get information about total number of filesi/blocks in a given dataset. """ expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs': # DBS2 call query = 'find count(file.name), count(block.name)' query += ' where dataset=%s and dataset.status=*' % dataset dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs') prim_key = 'dataset' for row in qlxml_parser(source, prim_key): if 'dataset' in row: totfiles = row['dataset']['count_file.name'] totblocks = row['dataset']['count_block.name'] return totblocks, totfiles elif 'error' in row: raise Exception(row.get('reason', row['error'])) # if we're here we didn't find a dataset, throw the error msg = 'empty set' raise Exception(msg) else: # we call filesummaries?dataset=dataset to get number of files/blks dbs_url += '/filesummaries' dbs_args = {'dataset': dataset} headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT, system='dbs3') for row in json_parser(source, None): totfiles = row[0]['num_file'] totblocks = row[0]['num_block'] return totblocks, totfiles
def get_ids(url, params, dataset, verbose=False): "Query either ReqMgr2 or WMStats and retrieve request ids" headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here ids = [] source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] found = 0 if not doc: continue for key in doc.keys(): if key.endswith("ConfigCacheID"): ids.append(doc[key]) found += 1 if not found: if 'id' in rec and 'key' in rec and rec['key'] == dataset: if rec['id']: ids.append(rec['id']) return ids
def parser(self, dasquery, dformat, data, api): "DAS parser for MCM data-service" prim_key = self.dasmapping.primary_key(self.name, api) gen = json_parser(data, self.logger) counter = 0 for rec in gen: if "results" in rec: row = rec["results"] else: row = rec for key in ["_id", "_rev"]: if key in row: del row[key] if row: if api == "dataset4mcm": for val in row.values(): yield {"dataset": {"name": val}} else: yield {"mcm": row} counter += 1 msg = "api=%s, format=%s " % (api, dformat) msg += "prim_key=%s yield %s rows" % (prim_key, counter) self.logger.info(msg)
def dbs_dataset4site_release(dbs_url, getdata, release): expire = 600 # set some expire since we're not going to use it if which_dbs(dbs_url) == 'dbs2': # in DBS3 I'll use datasets API and pass release over there query = 'find dataset where release=%s' % release dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \ 'query':query} headers = {'Accept': 'text/xml'} source, expire = getdata(dbs_url, dbs_args, expire, headers) prim_key = 'dataset' datasets = set() for row in qlxml_parser(source, prim_key): dataset = row['dataset']['dataset'] yield dataset else: # we call datasets?release=release to get list of datasets dbs_args = \ {'release_version': release, 'dataset_access_type':'PRODUCTION'} headers = {'Accept': 'application/json;text/json'} source, expire = getdata(dbs_url, dbs_args, expire, headers) for rec in json_parser(source, None): for row in rec: yield row['dataset']
def get_ids(url, params, dataset, verbose=False): "Query either ReqMgr or WMStats and retrieve request ids" headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here ids = [] source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] if not doc: continue if 'ProcConfigCacheID' in doc: ids.append(doc['ProcConfigCacheID']) elif 'ConfigCacheID' in doc: ids.append(doc['ConfigCacheID']) elif 'SkimConfigCacheID' in doc: ids.append(doc['SkimConfigCacheID']) else: if 'id' in rec and 'key' in rec and rec['key'] == dataset: ids.append(rec['id']) return ids
def parser_helper(self, query, dformat, source, api): """ DBS3 data-service parser helper, it is used by parser method. """ if api == "site4dataset": gen = json_parser(source, self.logger) else: gen = DASAbstractService.parser(self, query, dformat, source, api) if api == "site4dataset": sites = set() for rec in gen: if isinstance(rec, list): for row in rec: orig_site = row["origin_site_name"] if orig_site not in sites: sites.add(orig_site) else: orig_site = rec.get("origin_site_name", None) if orig_site and orig_site not in sites: sites.add(orig_site) for site in sites: yield {"site": {"name": site}} elif api == "datasets" or api == "dataset_info": for row in gen: row["name"] = row["dataset"] del row["dataset"] yield {"dataset": row} elif api == "filesummaries": name = query.mongo_query["spec"]["dataset.name"] for row in gen: row["dataset"]["name"] = name yield row elif api == "summary4dataset_run" or api == "summary4block_run": spec = query.mongo_query.get("spec", {}) dataset = spec.get("dataset.name", "") block = spec.get("block.name", "") run = spec.get("run.run_number", 0) if isinstance(run, dict): # we got a run range if "$in" in run: run = run["$in"] elif "$lte" in run: run = range(run["$gte"], run["$lte"]) for row in gen: if run: row.update({"run": run}) if dataset: row.update({"dataset": dataset}) if block: row.update({"block": block}) yield row elif api == "blockorigin": for row in gen: yield row elif api == "blockparents": for row in gen: try: del row["parent"]["this_block_name"] except: pass yield row elif api == "fileparents": for row in gen: parent = row["parent"] for val in parent["parent_logical_file_name"]: yield dict(name=val) elif api == "runs_via_dataset" or api == "runs": for row in gen: values = row["run"]["run_num"] if isinstance(values, list): for val in values: yield dict(run_number=val) else: yield dict(run_number=values) elif api == "filechildren": for row in gen: parent = row["child"] for val in parent["child_logical_file_name"]: yield dict(name=val) elif api == "files" or api == "files_via_dataset" or api == "files_via_block": status = "VALID" for row in gen: if "spec" in query.mongo_query: if "status.name" in query.mongo_query["spec"]: status = query.mongo_query["spec"]["status.name"] file_status = row["file"]["is_file_valid"] if status == "INVALID": # filter out valid files if int(file_status) == 1: # valid status row = None else: # filter out invalid files if int(file_status) == 0: # invalid status row = None if row: yield row elif api == "filelumis" or api == "filelumis4block": for row in gen: if "lumi" in row: if "lumi_section_num" in row["lumi"]: val = row["lumi"]["lumi_section_num"] row["lumi"]["lumi_section_num"] = convert2ranges(val) yield row else: yield row else: for row in gen: yield row
def parser_helper(self, query, dformat, source, api): """ DBS3 data-service parser helper, it is used by parser method. """ if api in ['site4dataset', 'site4block']: gen = json_parser(source, self.logger) else: gen = DASAbstractService.parser(self, query, dformat, source, api) if api in ['site4dataset', 'site4block']: sites = set() for rec in gen: if isinstance(rec, list): for row in rec: orig_site = row['origin_site_name'] if orig_site not in sites: sites.add(orig_site) else: orig_site = rec.get('origin_site_name', None) if orig_site and orig_site not in sites: sites.add(orig_site) for site in sites: yield {'site': {'name': site}} elif api == 'datasets' or api == 'dataset_info' or api == 'datasetlist': for row in gen: row['name'] = row['dataset'] del row['dataset'] yield {'dataset':row} elif api == 'filesummaries': name = query.mongo_query['spec']['dataset.name'] for row in gen: row['dataset']['name'] = name yield row elif api == 'summary4dataset_run' or api == 'summary4block_run': spec = query.mongo_query.get('spec', {}) dataset = spec.get('dataset.name', '') block = spec.get('block.name', '') run = spec.get('run.run_number', 0) if isinstance(run, dict): # we got a run range if '$in' in run: run = run['$in'] elif '$lte' in run: run = range(run['$gte'], run['$lte']) for row in gen: if run: row.update({"run": run}) if dataset: row.update({"dataset": dataset}) if block: row.update({"block": block}) yield row elif api == 'releaseversions': for row in gen: values = row['release']['release_version'] for val in values: yield dict(release=dict(name=val)) elif api == 'datasetaccesstypes': for row in gen: values = row['status']['dataset_access_type'] for val in values: yield dict(status=dict(name=val)) elif api == 'blockorigin': for row in gen: yield row elif api == 'blockparents': for row in gen: try: del row['parent']['this_block_name'] except: pass yield row elif api == 'fileparents': for row in gen: parent = row['parent'] for val in parent['parent_logical_file_name']: yield dict(name=val) elif api == 'runs_via_dataset' or api == 'runs': for row in gen: values = row.get('run', {}).get('run_num', 'N/A') if isinstance(values, list): for val in values: yield dict(run_number=val) else: yield dict(run_number=values) elif api == 'filechildren': for row in gen: parent = row['child'] for val in parent['child_logical_file_name']: yield dict(name=val) elif api == 'files' or api == 'files_via_dataset' or \ api == 'files_via_block': status = 'VALID' for row in gen: if 'spec' in query.mongo_query: if 'status.name' in query.mongo_query['spec']: status = query.mongo_query['spec']['status.name'] try: file_status = row['file']['is_file_valid'] except KeyError: file_status = 0 # file status is unknown if status == '*': # any file pass elif status == 'INVALID': # filter out valid files if int(file_status) == 1:# valid status row = None else: # filter out invalid files if int(file_status) == 0:# invalid status row = None if row: yield row elif api == 'filelumis' or api == 'filelumis4block': for row in gen: if 'lumi' in row: if 'lumi_section_num' in row['lumi']: val = row['lumi']['lumi_section_num'] row['lumi']['lumi_section_num'] = convert2ranges(val) yield row else: yield row else: for row in gen: yield row