def test_db_connection(self): """Test db_connection""" result = db_connection(self.dburi) expect = result.instance result = db_connection(self.dburi) self.assertEqual(expect, result.instance)
def dascore_monitor(cdict, func, sleep=5): """ Check status of DASCore and MongoDB connection for provided in cdict das/uri parameters. Invoke provided function upon successfull connection. """ uri = cdict['uri'] das = cdict['das'] conn = db_connection(uri) while True: time.sleep(sleep) if not conn or not das: conn = db_connection(uri) try: if conn['mapping']['db'].count(): time.sleep(3) # sleep to ensure that all maps are loaded func() # re-initialize DAS das = True # we do see maps in MongoDB else: das = False except: das = False if conn: print("### dascore_monitor, re-established connection \ %s, mapping.db records %s" % (conn, das)) else: print("### dascore_monitor, lost connection")
def conn_monitor(uri, func, sleep=5): """ Daemon which ensures MongoDB connection """ conn = db_connection(uri) while True: time.sleep(sleep) if not conn: conn = db_connection(uri) print "\n### re-establish connection to %s" % conn try: if conn: func() # re-initialize DB connection except Exception as err: print_exc(err)
def db_monitor(uri, func, sleep=5): """ Check status of MongoDB connection. Invoke provided function upon successfull connection. """ conn = db_connection(uri) while True: if not conn or not is_db_alive(uri): try: conn = db_connection(uri) func() print "\n### re-established connection %s" % conn except: pass time.sleep(sleep)
def create_db(self): """ Establish connection to MongoDB back-end and create DB. """ self.conn = db_connection(self.dburi) self.db = self.conn[self.dbname] self.col = self.db[self.colname]
def map_reduce(self, mr_input, dasquery, collection='merge'): """ Wrapper around _map_reduce to allow sequential map/reduce operations, e.g. map/reduce out of map/reduce. mr_input is either alias name or list of alias names for map/reduce functions. Input dasquery which is applied to first iteration of map/reduce functions. """ # NOTE: I need to revisit mapreduce. spec = dasquery.mongo_query['spec'] if not isinstance(mr_input, list): mrlist = [mr_input] else: mrlist = mr_input conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) coll = mdb[collection] for mapreduce in mrlist: if mapreduce == mrlist[0]: cond = spec else: cond = None coll = self._map_reduce(coll, mapreduce, cond) for row in coll.find(): yield row
def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') conn = db_connection(self.dburi) col = conn[self.name][cname] local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if 'expire' not in r][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def datasets(inst='cms_dbs_prod_global'): "Provide list of datasets" dasconfig = das_readconfig() conn = db_connection(dasconfig['mongodb']['dburi']) coll = conn['dbs'][inst] for row in coll.find(): yield row['dataset']
def incache(self, dasquery, collection='merge', system=None, api=None, query_record=False): """ Check if we have query results in cache, otherwise return null. Please note, input parameter query means MongoDB query, please consult MongoDB API for more details, http://api.mongodb.org/python/ """ if query_record: record = record_codes('query_record') else: record = spec4data_records() spec = {'qhash':dasquery.qhash, 'das.record':record, 'das.expire':{'$gt':time.time()}} if system: spec.update({'das.system': system}) if api: spec.update({'das.api': api}) conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] res = col.find(spec, **PYMONGO_OPTS).count() msg = "(%s, coll=%s) found %s results" % (dasquery, collection, res) self.logger.info(msg) if res: return True return False
def collection(uri): """ Return collection cursor """ conn = db_connection(uri) coll = conn['db']['datasets'] return coll
def delete_db(self): """ Delete mapping DB in MongoDB back-end. """ conn = db_connection(self.dburi) if conn: conn.drop_database(self.dbname)
def db_monitor(uri, func, sleep=5, max_retries=None): """ Check status of MongoDB connection. Invoke provided function upon successfull connection. """ conn = None retries = 0 while True: # db is dead if not (conn and is_db_alive(uri)): try: conn = db_connection(uri) except Exception as exc: print exc if conn and is_db_alive(uri): print "### established connection %s" % conn func() # limit the number of retries if needed retries += 1 if max_retries is not None and retries > max_retries: break time.sleep(sleep)
def init(self): """ Establish connection to MongoDB back-end and create DB. """ col = None try: conn = db_connection(self.dburi) if conn: dbc = conn[self.dbname] col = dbc[self.colname] # print "### DASMapping:init started successfully" except ConnectionFailure as _err: tstamp = dastimestamp("") thread = threading.current_thread() print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (thread.name, thread.ident, tstamp) except Exception as exc: print_exc(exc) if col: index = [ ("type", DESCENDING), ("system", DESCENDING), ("urn", DESCENDING), ("das_map.das_key", DESCENDING), ("das_map.rec_key", DESCENDING), ("das_map.api_arg", DESCENDING), ] create_indexes(col, index)
def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB." try: conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[coll] nres = col.find(spec, exhaust=True).count() if nres == 1 or nres <= limit: limit = 0 if limit: res = col.find(spec=spec, fields=fields, sort=skeys, skip=idx, limit=limit) else: res = col.find(spec=spec, fields=fields, sort=skeys, exhaust=True) if unique: res = unique_filter(res) for row in res: yield row except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row
def __init__(self, dburi, dbname='das', dbcoll='requests', lifetime=86400): self.con = db_connection(dburi) self.col = self.con[dbname][dbcoll] self.hold = self.con[dbname][dbcoll + '_onhold'] create_indexes(self.col , [('ts', ASCENDING)]) create_indexes(self.hold, [('ts', ASCENDING)]) self.lifetime = lifetime # default 1 hour
def delete_db_collection(self): """ Delete mapping DB collection in MongoDB. """ conn = db_connection(self.dburi) if conn: dbc = conn[self.dbname] dbc.drop_collection(self.colname)
def init(self): "Initialize connection to MongoDB" conn = db_connection(self.dburi) if conn: database = conn[self.dbname] if self.dbcoll not in database.collection_names(): database.create_collection(self.dbcoll, \ capped=True, size=self.dbsize)
def create_db(self): """ Create db collection """ conn = db_connection(self.dburi) dbn = conn[self.dbname] if self.colname not in dbn.collection_names(): dbn.create_collection(self.colname, capped=True, size=self.sizecap) self.col = dbn[self.colname]
def cleanup_worker(dburi, dbname, collections, sleep): """DAS cache cleanup worker""" while True: conn = db_connection(dburi) spec = {'das.expire': { '$lt':time.time()}} for col in collections: with threading.Lock(): conn[dbname][col].remove(spec) time.sleep(sleep)
def create_db(self): """ Create analytics DB in MongoDB back-end. """ self.conn = db_connection(self.dburi) database = self.conn[self.dbname] das_son_manipulator = DAS_SONManipulator() database.add_son_manipulator(das_son_manipulator) self.col = database[self.colname]
def create_db(self): """ Create db collection """ conn = db_connection(self.dburi) dbn = conn[self.dbname] if self.colname not in dbn.collection_names(): dbn.create_collection(self.colname, capped=True, size=self.sizecap) col = dbn[self.colname] index_list = [('qhash', DESCENDING)] create_indexes(col, index_list)
def init(self): """Takes care of MongoDB connection""" try: conn = db_connection(self.uri) self.coll = conn[self.dbname][self.collname] indexes = [('name', DESCENDING), ('site', DESCENDING), ('timestamp', DESCENDING)] for index in indexes: create_indexes(self.coll, [index]) except Exception, _exp: self.coll = None
def __init__(self, config): self.sleep = config.get('sleep', 5) pattern = {'das.system':'dbs', 'das.primary_key': 'dataset.name'} self.pattern = config.get('query_pattern', pattern) nworkers = int(config.get('nworkers', 10)) name = config.get('name', 'dataset_keeper') dasconfig = das_readconfig() debug = False self.dascore = DASCore(config=dasconfig, nores=True, debug=debug) self.taskmgr = TaskManager(nworkers=nworkers, name=name) self.conn = db_connection(dasconfig['mongodb']['dburi'])
def col(self): "col property provides access to DAS keylearning collection" conn = db_connection(self.dburi) mdb = conn[self.dbname] colnames = mdb.collection_names() if not colnames or self.colname not in colnames: try: mdb.create_collection(self.colname) except OperationFailure: pass mdb.add_son_manipulator(self.das_son_manipulator) return mdb[self.colname]
def merge(self): "merge property provides access to DAS merge collection" conn = db_connection(self.dburi) mdb = conn[self.dbname] colnames = mdb.collection_names() if not colnames or self.merge_ not in colnames: try: mdb.create_collection(self.merge_) except OperationFailure: pass mdb.add_son_manipulator(self.das_son_manipulator) return mdb[self.merge_]
def db_monitor(uri, func, sleep, reload_map, reload_time, check_maps, reload_time_bad_maps): """ Check status of MongoDB connection and reload DAS maps once in a while. """ time0 = time.time() valid_maps = False try: valid_maps = check_maps() except Exception as err: print_exc(err) while True: conn = db_connection(uri) if not conn or not is_db_alive(uri): try: conn = db_connection(uri, verbose=False) func() if conn: print "### db_monitor re-established connection %s" % conn valid_maps = check_maps() else: print "### db_monitor, lost connection" except Exception as err: print_exc(err) if conn: # reload invalid more quickly reload_intervl = reload_time if valid_maps else reload_time_bad_maps if time.time() - time0 > reload_intervl: map_state = "INVALID" if not valid_maps else "" msg = "reload %s DAS maps %s" % (map_state, reload_map) print dastimestamp(), msg try: reload_map() valid_maps = check_maps() except Exception as err: print_exc(err) time0 = time.time() time.sleep(sleep)
def init(self): """ Init db connection and check that it is alive """ try: conn = db_connection(self.dburi) self.col = conn[self.dbname][self.dbcoll] indexes = [('dataset', ASCENDING), ('ts', ASCENDING)] create_indexes(self.col, indexes) self.col.remove() except Exception as _exp: self.col = None if not is_db_alive(self.dburi): self.col = None
def get_dataset_hashes(self, dasquery): "Get dataset hashes from DBS database" spec = dasquery.mongo_query.get('spec', {}) inst = dasquery.instance conn = db_connection(self.dburi) if spec and inst: dataset = spec.get('dataset.name', None) if dataset: if dataset.find('*') != -1: cond = {'dataset':re.compile(dataset.replace('*', '.*'))} else: cond = {'dataset': dataset} for row in conn['dbs'][inst].find(cond): if 'qhash' in row: yield row['qhash']
def das_mapreduces(): """ Return list of DAS mapreduce functions """ mlist = [] config = das_readconfig() dburi = config['mongodb']['dburi'] dbname = config['dasdb']['dbname'] colname = config['dasdb']['mrcollection'] conn = db_connection(dburi) coll = conn[dbname][colname] for row in coll.find({}): if set(row.keys()) == set(['map', 'reduce', 'name', '_id']): mlist.append(row['name']) return mlist
def __init__(self, config): DASAbstractService.__init__(self, 'cmsswconfigs', config) self.headers = {'Accept': 'text/json;application/json'} self.map = self.dasmapping.servicemap(self.name) map_validator(self.map) # specify access to DB dburi = config.get('dburi') self.conn = db_connection(dburi) database = self.conn['configdb'] self.managers = {} for release in database.collection_names(): if release.find('index') == -1: self.managers[release] = MongoQuery(release) self.releases = self.managers.keys()
def get_dataset_hashes(self, dasquery): "Get dataset hashes from DBS database" spec = dasquery.mongo_query.get('spec', {}) inst = dasquery.instance conn = db_connection(self.dburi) if spec and inst: dataset = spec.get('dataset.name', None) if dataset: if dataset.find('*') != -1: cond = {'dataset': re.compile(dataset.replace('*', '.*'))} else: cond = {'dataset': dataset} for row in conn['dbs'][inst].find(cond): if 'qhash' in row: yield row['qhash']
def __init__(self, config): DASAbstractService.__init__(self, 'cmsswconfigs', config) self.headers = {'Accept': 'text/json;application/json'} self.map = self.dasmapping.servicemap(self.name) map_validator(self.map) # specify access to DB dburi = config.get('dburi') self.conn = db_connection(dburi) database = self.conn['configdb'] self.managers = {} for release in database.collection_names(): if release.find('index') == -1: self.managers[release] = MongoQuery(release) self.releases = list(self.managers.keys())
def existing_indexes(self, collection='merge'): """ Get list of existing indexes in DB. They are returned by index_information API in the following for: .. doctest:: {u'_id_': {u'key': [(u'_id', 1)], u'v': 0}, u'das.expire_1': {u'key': [(u'das.expire', 1)], u'v': 0}, ... u'tier.name_-1': {u'key': [(u'tier.name', -1)], u'v': 0}} """ conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] for val in col.index_information().values(): for idx in val['key']: yield idx[0] # index name
def nresults(self, dasquery, collection='merge'): """Return number of results for given query.""" if dasquery.aggregators: return len(dasquery.aggregators) # Distinguish 2 use cases, unique filter and general query # in first one we should count only unique records, in later # we can rely on DB count() method. Pleas keep in mind that # usage of fields in find doesn't account for counting, since it # is a view over records found with spec, so we don't need to use it. fields, filter_cond = self.get_fields(dasquery) if not fields: spec = dasquery.mongo_query.get('spec', {}) elif dasquery.hashes: spec = { 'qhash': { '$in': dasquery.hashes }, 'das.record': spec4data_records() } else: spec = {'qhash': dasquery.qhash, 'das.record': spec4data_records()} if filter_cond: spec.update(filter_cond) conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] if dasquery.unique_filter: skeys = self.mongo_sort_keys(collection, dasquery) if skeys: gen = col.find(spec, **PYMONGO_OPTS).sort(skeys) else: gen = col.find(spec, **PYMONGO_OPTS) res = len([r for r in unique_filter(gen)]) else: res = col.find(spec, **PYMONGO_OPTS).count() if not res: # double check that this is really the case time.sleep(1) res = col.find(spec, **PYMONGO_OPTS).count() msg = "%s" % res self.logger.info(msg) return res
def cleanup(dasconfig, verbose=False): """DAS cache cleanup worker""" config = dasconfig['mongodb'] dburi = config['dburi'] dbname = config['dbname'] cols = config['collections'] del_ttl = dasconfig['dasdb']['cleanup_delta_ttl'] conn = db_connection(dburi) spec = {'das.expire': {'$lt': time.time() - del_ttl}} msgs = [] for col in cols: if verbose: ndocs = conn[dbname][col].find(spec).count() msgs.append('%s.%s %s docs' % (dbname, col, ndocs)) if pymongo.version.startswith('3.'): # pymongo 3.X conn[dbname][col].delete_many(spec) else: conn[dbname][col].remove(spec) if verbose: tstamp = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(time.time())) print('%s %s %s' % (tstamp, json.dumps(spec), ' '.join(msgs)))
def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB." try: conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[coll] nres = col.find(spec, **PYMONGO_OPTS).count() if nres == 1 or nres <= limit: limit = 0 if limit: res = col.find(spec, fields, sort=skeys, skip=idx, limit=limit) else: res = col.find(spec, fields, sort=skeys, **PYMONGO_OPTS) if unique: res = unique_filter(res) for row in res: yield row except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row
def incache(self, dasquery, collection='merge', system=None, api=None, query_record=False): """ Check if we have query results in cache, otherwise return null. Please note, input parameter query means MongoDB query, please consult MongoDB API for more details, http://api.mongodb.org/python/ """ if query_record: record = record_codes('query_record') else: record = spec4data_records() spec = { 'qhash': dasquery.qhash, 'das.record': record, 'das.expire': { '$gt': time.time() } } if system: spec.update({'das.system': system}) if api: spec.update({'das.api': api}) conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] res = col.find(spec, **PYMONGO_OPTS).count() msg = "(%s, coll=%s) found %s results" % (dasquery, collection, res) self.logger.info(msg) if res: return True return False
def remove_expired(self, dasquery, collection): """ Remove expired records from DAS cache. We need to perform this operation very carefullly since we don't use transation and on-going commits can invoke this method (see das_core.py). Therefore we use MongoDB $or operator to wipe out queries which match DASQuery hash and already expired or queries which lived in cache more then rec_ttl config parameter. The later operation just prevent DAS cache from growing. """ conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[collection] # use additional delta to check data record expiration # we add this delta to ensure that there is no records close to # current timestamp which may expire during request processing spec = { 'qhash': dasquery.qhash, 'das.expire': { '$lt': time.time() + self.del_ttl } } col.delete_many(spec)
def col(self): "Collection object to MongoDB" conn = db_connection(self.dburi) dbn = conn[self.dbname] col = dbn[self.colname] return col
def mrcol(self): "mrcol property provides access to DAS map-reduce collection" conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) return mdb[self.mrcol_]
def col(self): """Return MongoDB collection object""" conn = db_connection(self.dburi) col = conn[self.dbname][self.dbcoll] return col
def col(self): "Return MongoDB collection object" conn = db_connection(self.dburi) dbc = conn[self.dbname] col = dbc[self.dbcoll] return col