def test_das_sinfo(self): "Test das_sinfo function" das = {'system':['dbs', 'dbs', 'phedex'], 'api':['one', 'two', 'thr']} row = {'das':das, 'foo':[{'dbs':1}, {'a':1}, {'phedex':1}]} result = das_sinfo(row) expect = {'dbs':set(['one', 'two']), 'phedex':set(['three'])} self.assertEqual(expect, expect)
def test_das_sinfo(self): "Test das_sinfo function" das = { 'system': ['dbs', 'dbs', 'phedex'], 'api': ['one', 'two', 'thr'] } row = {'das': das, 'foo': [{'dbs': 1}, {'a': 1}, {'phedex': 1}]} result = das_sinfo(row) expect = {'dbs': set(['one', 'two']), 'phedex': set(['three'])} self.assertEqual(expect, expect)
def get_from_cache(self, dasquery, idx=0, limit=0, collection='merge'): """ Look-up results from the merge cache and yield them for further processing. """ das_timer('DASCore::get_from_cache', self.verbose) msg = 'col=%s, query=%s, idx=%s, limit=%s'\ % (collection, dasquery, idx, limit) self.logger.info(msg) fields = dasquery.mongo_query.get('fields', None) if dasquery.mapreduce: res = self.rawcache.map_reduce(dasquery.mapreduce, dasquery) elif dasquery.aggregators: # extract das information from rawcache rows = self.rawcache.get_from_cache(\ dasquery, collection=collection) first = rows.next() sinfo = das_sinfo(first) # to perform aggregation we need: # - loop over all aggregator functions # - loop over all data-services # - loop over all APIs within a data-services # the code below does that, it applies aggregator # to selected (based on key/srv/api) records res = [] _id = 0 time0 = time.time() expire = 300 # min expire for func, key in dasquery.aggregators: afunc = getattr(das_aggregator, 'das_%s' % func) found = False for srv, apis, in sinfo.items(): for api in apis: rows = self.rawcache.get_from_cache(\ dasquery, collection=collection) gen = api_rows(rows, api) data = afunc(key, gen) ctime = time.time() - time0 das = dasheader(srv, dasquery, expire, api=api, ctime=ctime) if isinstance(data, dict) and data['value'] != 'N/A': aggr = {'_id':_id, 'function': func, 'key': key, 'result': data} aggr.update(das) res.append(aggr) _id += 1 found = True if not found: # when we got nothing add empty result record empty = {'value':'N/A'} ctime = time.time() - time0 das = dasheader('das', dasquery, expire, api='das_core', ctime=ctime) rec = {'_id':0, 'function':func, 'key':key, 'result':empty} rec.update(das) res.append(rec) elif isinstance(fields, list) and 'queries' in fields: res = itertools.islice(self.get_queries(dasquery), idx, idx+limit) else: res = self.rawcache.get_from_cache(dasquery, idx, limit, \ collection=collection) for row in res: fix_times(row) yield row das_timer('DASCore::get_from_cache', self.verbose)
def get_from_cache(self, dasquery, idx=0, limit=0, collection='merge'): """ Look-up results from the merge cache and yield them for further processing. """ das_timer('DASCore::get_from_cache', self.verbose) msg = 'col=%s, query=%s, idx=%s, limit=%s'\ % (collection, dasquery, idx, limit) self.logger.info(msg) fields = dasquery.mongo_query.get('fields', None) if dasquery.mapreduce: res = self.rawcache.map_reduce(dasquery.mapreduce, dasquery) elif dasquery.aggregators: # extract das information from rawcache rows = self.rawcache.get_from_cache(\ dasquery, collection=collection) first = next(rows) sinfo = das_sinfo(first) # to perform aggregation we need: # - loop over all aggregator functions # - loop over all data-services # - loop over all APIs within a data-services # the code below does that, it applies aggregator # to selected (based on key/srv/api) records res = [] _id = 0 time0 = time.time() expire = 300 # min expire for func, key in dasquery.aggregators: afunc = getattr(das_aggregator, 'das_%s' % func) found = False for srv, apis, in sinfo.items(): for api in apis: rows = self.rawcache.get_from_cache(\ dasquery, collection=collection) gen = api_rows(rows, api) data = afunc(key, gen) ctime = time.time() - time0 das = dasheader(srv, dasquery, expire, api=api, ctime=ctime) if isinstance(data, dict) and data['value'] != 'N/A': aggr = { '_id': _id, 'function': func, 'key': key, 'result': data } aggr.update(das) res.append(aggr) _id += 1 found = True if not found: # when we got nothing add empty result record empty = {'value': 'N/A'} ctime = time.time() - time0 das = dasheader('das', dasquery, expire, api='das_core', ctime=ctime) rec = { '_id': 0, 'function': func, 'key': key, 'result': empty } rec.update(das) res.append(rec) else: res = self.rawcache.get_from_cache(dasquery, idx, limit, \ collection=collection) # we assume that all records from single query will have # identical structure, therefore it will be sufficient to update # keylearning DB only with first record count = 0 for row in res: if not count: self.keylearning.add_record(dasquery, row) fix_times(row) yield row count += 1 das_timer('DASCore::get_from_cache', self.verbose)
def get_from_cache(self, dasquery, idx=0, limit=0, collection="merge"): """ Look-up results from the merge cache and yield them for further processing. """ das_timer("DASCore::get_from_cache", self.verbose) msg = "col=%s, query=%s, idx=%s, limit=%s" % (collection, dasquery, idx, limit) self.logger.info(msg) fields = dasquery.mongo_query.get("fields", None) if dasquery.mapreduce: res = self.rawcache.map_reduce(dasquery.mapreduce, dasquery) elif dasquery.aggregators: # extract das information from rawcache rows = self.rawcache.get_from_cache(dasquery, collection=collection) first = next(rows) sinfo = das_sinfo(first) # to perform aggregation we need: # - loop over all aggregator functions # - loop over all data-services # - loop over all APIs within a data-services # the code below does that, it applies aggregator # to selected (based on key/srv/api) records res = [] _id = 0 time0 = time.time() expire = 300 # min expire for func, key in dasquery.aggregators: afunc = getattr(das_aggregator, "das_%s" % func) found = False for srv, apis in sinfo.items(): for api in apis: rows = self.rawcache.get_from_cache(dasquery, collection=collection) gen = api_rows(rows, api) data = afunc(key, gen) ctime = time.time() - time0 das = dasheader(srv, dasquery, expire, api=api, ctime=ctime) if isinstance(data, dict) and data["value"] != "N/A": aggr = {"_id": _id, "function": func, "key": key, "result": data} aggr.update(das) res.append(aggr) _id += 1 found = True if not found: # when we got nothing add empty result record empty = {"value": "N/A"} ctime = time.time() - time0 das = dasheader("das", dasquery, expire, api="das_core", ctime=ctime) rec = {"_id": 0, "function": func, "key": key, "result": empty} rec.update(das) res.append(rec) else: res = self.rawcache.get_from_cache(dasquery, idx, limit, collection=collection) # we assume that all records from single query will have # identical structure, therefore it will be sufficient to update # keylearning DB only with first record count = 0 for row in res: if not count: self.keylearning.add_record(dasquery, row) fix_times(row) yield row count += 1 das_timer("DASCore::get_from_cache", self.verbose)