def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') conn = db_connection(self.dburi) col = conn[self.name][cname] local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if 'expire' not in r][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def lookup_query(self, rawtext): """ Check the parser cache for a given rawtext query. Search is done with the qhash of this string. Returns a tuple (status, value) for the cases (PARSERCACHE_VALID, mongo_query) - valid query found (PARSERCACHE_INVALID, error) - error message for invalid query (PARSERCACHE_NOTFOUND, None) - not in the cache """ result = find_one(self.col, {'qhash':genkey(rawtext)}, \ fields=['query', 'error']) if result and result['query']: if self.verbose: self.logger.debug("DASParserCache: found valid %s->%s" %\ (rawtext, result['query'])) query = decode_mongo_query(result['query']) return (PARSERCACHE_VALID, query) elif result and result['error']: if self.verbose: self.logger.debug("DASParserCache: found invalid %s->%s" %\ (rawtext, result['error'])) return (PARSERCACHE_INVALID, result['error']) else: if self.verbose: self.logger.debug("DASParserCache: not found %s" %\ (rawtext)) return (PARSERCACHE_NOTFOUND, None)
def find(self, dasquery): """ Find provided query in DAS cache. """ cond = {'qhash': dasquery.qhash, 'das.system':'das', 'das.expire': {'$gt':time.time()}} return find_one(self.col, cond)
def lookup_query(self, rawtext): """ Check the parser cache for a given rawtext query. Search is done with the qhash of this string. Returns a tuple (status, value) for the cases (PARSERCACHE_VALID, mongo_query) - valid query found (PARSERCACHE_INVALID, error) - error message for invalid query (PARSERCACHE_NOTFOUND, None) - not in the cache """ result = find_one(self.col, {'qhash':genkey(rawtext)}, fields=['query', 'error']) if result and result['query']: if self.verbose: self.logger.debug("DASParserCache: found valid %s->%s" %\ (rawtext, result['query'])) query = decode_mongo_query(result['query']) return (PARSERCACHE_VALID, query) elif result and result['error']: if self.verbose: self.logger.debug("DASParserCache: found invalid %s->%s" %\ (rawtext, result['error'])) return (PARSERCACHE_INVALID, result['error']) else: if self.verbose: self.logger.debug("DASParserCache: not found %s" %\ (rawtext)) return (PARSERCACHE_NOTFOUND, None)
def isexpired(self): """ Check if data is expired in DB. """ spec = {'ts': {'$lt': time.time() + self.expire}} if self.coll and find_one(self.coll, spec): return False return True
def das_presentation_map(self): "Read DAS presentation map" spec = {"type": "presentation"} data = find_one(self.col, spec) if data: for daskey, uilist in data.get("presentation", {}).iteritems(): for row in uilist: if "link" in row: yield row
def das_presentation_map(self): "Read DAS presentation map" spec = {'type':'presentation'} data = find_one(self.col, spec) if data: for _, uilist in data.get('presentation', {}).items(): for row in uilist: if 'link' in row: yield row
def add_mapreduce(self, name, fmap, freduce): """ Add mapreduce record and assign it to given name. """ print("Add %s map/reduce function" % name) exists = find_one(self.mapreduce, {'name':name}) if exists: raise Exception('Map/reduce functions for %s already exists' % name) self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce)) create_indexes(self.mapreduce, [('name', DESCENDING)])
def add_mapreduce(self, name, fmap, freduce): """ Add mapreduce record and assign it to given name. """ print("Add %s map/reduce function" % name) exists = find_one(self.mapreduce, {'name': name}) if exists: raise Exception('Map/reduce functions for %s already exists' % name) self.mapreduce.insert(dict(name=name, map=fmap, reduce=freduce)) create_indexes(self.mapreduce, [('name', DESCENDING)])
def update(self): """ Update DBS collection with a fresh copy of datasets. Upon first insert of datasets we add dataset:__POPULATED__ record to be used as a flag that cache was populated in this cache. """ if SKIP_UPDATES: return None dbc = self.col if not dbc: print "%s DBSDaemon %s, no connection to DB" % (dastimestamp(), self.dbcoll) return try: time0 = round(time.time()) udict = {"$set": {"ts": time0}} cdict = {"dataset": "__POPULATED__"} gen = self.datasets() msg = "" if not dbc.count(): try: # perform bulk insert operation while True: if not dbc.insert(itertools.islice(gen, self.cache_size)): break except InvalidOperation as err: # please note we need to inspect error message to # distinguish InvalidOperation from generate exhastion if str(err) == "cannot do an empty bulk insert": dbc.insert(cdict) pass except Exception as err: pass # remove records with old ts dbc.remove({"ts": {"$lt": time0 - self.expire}}) msg = "inserted new" else: # we already have records, update their ts for row in gen: spec = dict(dataset=row["dataset"]) dbc.update(spec, udict, upsert=True) msg = "updated old" if find_one(dbc, cdict): dbc.update(cdict, udict) print "%s DBSDaemon %s, %s %s records in %s sec" % ( dastimestamp(), self.dbcoll, msg, dbc.count(), round(time.time() - time0), ) except Exception as exc: print "%s DBSDaemon %s, fail to update, reason %s" % (dastimestamp(), self.dbcoll, str(exc))
def find(self, dasquery): """ Find provided query in DAS cache. """ cond = { 'qhash': dasquery.qhash, 'das.system': 'das', 'das.expire': { '$gt': time.time() } } return find_one(self.col, cond)
def primary_key(self, das_system, urn): """ Return DAS primary key for provided system and urn. The DAS primary key is a first entry in *lookup* attribute of DAS API record. """ spec = {"system": das_system, "urn": urn} record = find_one(self.col, spec) if not record: return None pkey = record["lookup"] if pkey.find(",") != -1: pkey = pkey.split(",")[0] return pkey
def primary_key(self, das_system, urn): """ Return DAS primary key for provided system and urn. The DAS primary key is a first entry in *lookup* attribute of DAS API record. """ spec = {'system':das_system, 'urn':urn} record = find_one(self.col, spec) if not record: return None pkey = record['lookup'] if pkey.find(',') != -1: pkey = pkey.split(',')[0] return pkey
def update(self): """ Update DBS collection with a fresh copy of datasets. Upon first insert of datasets we add dataset:__POPULATED__ record to be used as a flag that cache was populated in this cache. """ if SKIP_UPDATES: return None dbc = self.col if not dbc: print("%s DBSDaemon %s, no connection to DB" \ % (dastimestamp(), self.dbcoll)) return try: time0 = round(time.time()) udict = {'$set':{'ts':time0}} cdict = {'dataset':'__POPULATED__'} gen = self.datasets() msg = '' if not dbc.count(): try: # perform bulk insert operation res = dbc.insert_many(gen) except InvalidOperation as err: # please note we need to inspect error message to # distinguish InvalidOperation from generate exhastion if str(err) == 'cannot do an empty bulk insert': dbc.insert(cdict) pass except Exception as err: pass # remove records with old ts spec = {'ts':{'$lt':time0-self.expire}} dbc.delete_many(spec) msg = 'inserted' else: # we already have records, update their ts for row in gen: spec = dict(dataset=row['dataset']) dbc.update(spec, udict, upsert=True) msg = 'updated' if find_one(dbc, cdict): dbc.update(cdict, udict) print("%s DBSDaemon %s, %s %s records in %s sec" \ % (dastimestamp(), self.dbcoll, msg, dbc.count(), round(time.time()-time0))) except Exception as exc: print("%s DBSDaemon %s, fail to update, reason %s" \ % (dastimestamp(), self.dbcoll, str(exc)))
def primary_mapkey(self, das_system, urn): """ Return DAS primary map key for provided system and urn. For example, the file DAS key is mapped to file.name, so this API will return file.name """ spec = {'system':das_system, 'urn':urn} record = find_one(self.col, spec) mapkey = [] for row in record['das_map']: lkey = record['lookup'] if lkey.find(',') != -1: lkey = lkey.split(',')[0] if row['das_key'] == lkey: return row['rec_key'] return mapkey
def primary_mapkey(self, das_system, urn): """ Return DAS primary map key for provided system and urn. For example, the file DAS key is mapped to file.name, so this API will return file.name """ spec = {"system": das_system, "urn": urn} record = find_one(self.col, spec) mapkey = [] for row in record["das_map"]: lkey = record["lookup"] if lkey.find(",") != -1: lkey = lkey.split(",")[0] if row["das_key"] == lkey: return row["rec_key"] return mapkey
def init_presentationcache(self): """ Initialize presentation cache by reading presentation map. """ spec = {"type": "presentation"} data = find_one(self.col, spec) if data: self.presentationcache = data["presentation"] for daskey, uilist in self.presentationcache.iteritems(): for row in uilist: link = None if "link" in row: link = row["link"] if "diff" in row: self.diffkeycache[daskey] = row["diff"] tdict = {daskey: {"mapkey": row["das"], "link": link}} if row["ui"] in self.reverse_presentation: self.reverse_presentation[row["ui"]].update(tdict) else: self.reverse_presentation[row["ui"]] = {daskey: {"mapkey": row["das"], "link": link}}
def _map_reduce(self, coll, mapreduce, spec=None): """ Perform map/reduce operation over DAS cache using provided collection, mapreduce name and optional conditions. """ self.logger.debug("(%s, %s)" % (mapreduce, spec)) record = find_one(self.mrcol, {'name': mapreduce}) if not record: raise Exception("Map/reduce function '%s' not found" % mapreduce) fmap = record['map'] freduce = record['reduce'] if spec: result = coll.map_reduce(Code(fmap), Code(freduce), query=spec) else: result = coll.map_reduce(Code(fmap), Code(freduce)) msg = "found %s records in %s" % (result.count(), result.name) self.logger.info(msg) self.logger.debug(fmap) self.logger.debug(freduce) return result
def _map_reduce(self, coll, mapreduce, spec=None): """ Perform map/reduce operation over DAS cache using provided collection, mapreduce name and optional conditions. """ self.logger.debug("(%s, %s)" % (mapreduce, spec)) record = find_one(self.mrcol, {'name':mapreduce}) if not record: raise Exception("Map/reduce function '%s' not found" % mapreduce) fmap = record['map'] freduce = record['reduce'] if spec: result = coll.map_reduce(Code(fmap), Code(freduce), query=spec) else: result = coll.map_reduce(Code(fmap), Code(freduce)) msg = "found %s records in %s" % (result.count(), result.name) self.logger.info(msg) self.logger.debug(fmap) self.logger.debug(freduce) return result
def init_presentationcache(self): """ Initialize presentation cache by reading presentation map. """ spec = {'type':'presentation'} data = find_one(self.col, spec) if data: self.presentationcache = data['presentation'] for daskey, uilist in self.presentationcache.items(): for row in uilist: link = None if 'link' in row: link = row['link'] if 'diff' in row: self.diffkeycache[daskey] = row['diff'] tdict = {daskey : {'mapkey': row['das'], 'link': link}} if row['ui'] in self.reverse_presentation: self.reverse_presentation[row['ui']].update(tdict) else: self.reverse_presentation[row['ui']] = \ {daskey : {'mapkey': row['das'], 'link': link}}
def check_filters(self, collection, spec, fields): "Check that given filters can be applied to records found with spec" if not fields: return conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] data = find_one(col, spec) if not data: return found = False for fltr in fields: row = dict(data) if fltr in row or 'error' in row: found = True break for key in fltr.split('.'): if isinstance(row, dict): if key in row: row = row[key] found = True else: found = False elif isinstance(row, list): for row in list(row): if key in row: row = row[key] found = True break else: found = False if not found: err = "check_filters unable to find filter=%s" % fltr err += "\nrecord=%s" % data raise Exception(err)
def das_record(self, dasquery): "Retrieve DAS record for given query" cond = {'qhash': dasquery.qhash, 'das.expire':{'$gt':time.time()}} return find_one(self.col, cond)
def das_record(self, dasquery): "Retrieve DAS record for given query" cond = {'qhash': dasquery.qhash, 'das.expire': {'$gt': time.time()}} return find_one(self.col, cond)