def init(self): """Init DAS web server, connect to DAS Core""" try: self.logcol = DASLogdb(self.dasconfig) self.reqmgr = RequestManager(self.dburi, lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dasmapping.init_presentationcache() self.colors = {} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) self.sitedbmgr = SiteDBService(self.dasconfig) except Exception as exc: print_exc(exc) self.dasmgr = None self.daskeys = [] self.colors = {} return # Start Onhold_request daemon if self.dasconfig['web_server'].get('onhold_daemon', False): self.process_requests_onhold()
def get_status(self, dasquery): """ Look-up status of provided query in a cache. Return status of the query request and its hash. """ status = None error = None reason = None if dasquery and "fields" in dasquery.mongo_query: fields = dasquery.mongo_query["fields"] if fields and isinstance(fields, list) and "queries" in fields: return "ok", error, reason record = self.rawcache.find(dasquery) error, reason = self.rawcache.is_error_in_records(dasquery) try: if record and "das" in record and "status" in record["das"]: status = record["das"]["status"] if not error: error = record["das"].get("error", error) if not reason: reason = record["das"].get("reason", reason) return status, error, reason except Exception as exc: print_exc(exc) status = error = reason = None self.rawcache.remove_from_cache(dasquery) return status, error, reason
def quote(data): """ Sanitize the data using cgi.escape. """ if isinstance(data, int) or isinstance(data, float): res = data elif isinstance(data, dict): res = data elif isinstance(data, list): res = data elif isinstance(data, long) or isinstance(data, int) or\ isinstance(data, float): res = data elif isinstance(data, ObjectId): res = str(data) else: try: if data: res = cgi.escape(data, quote=True) else: res = "" except Exception as exc: print_exc(exc) print("Unable to cgi.escape(%s, quote=True)" % data) res = "" return res
def get_status(self, dasquery): """ Look-up status of provided query in a cache. Return status of the query request and its hash. """ status = None error = None reason = None for col in ['merge', 'cache']: self.rawcache.remove_expired(dasquery, col) if dasquery and 'fields' in dasquery.mongo_query: fields = dasquery.mongo_query['fields'] if fields and isinstance(fields, list) and 'queries' in fields: return 'ok', error, reason record = self.rawcache.find(dasquery) error, reason = self.rawcache.is_error_in_records(dasquery) try: if record and 'das' in record and 'status' in record['das']: status = record['das']['status'] if not error: error = record['das'].get('error', error) if not reason: reason = record['das'].get('reason', reason) return status, error, reason except Exception as exc: print_exc(exc) status = error = reason = None self.rawcache.remove_from_cache(dasquery) return status, error, reason
def apicall(self, dasquery, url, api, args, dformat, expire): """ Data service api method, can be defined by data-service class. It parse input query and invoke appropriate data-service API call. All results are stored into the DAS cache along with api call inserted into Analytics DB. We invoke explicitly close call for our datastream instead of using context manager since this method as well as getdata/parser can be overwritten by child classes. """ datastream = None try: args = self.inspect_params(api, args) time0 = time.time() headers = make_headers(dformat) datastream, expire = self.getdata(url, args, expire, headers) self.logger.info("%s expire %s" % (api, expire)) rawrows = self.parser(dasquery, dformat, datastream, api) dasrows = self.translator(api, rawrows) ctime = time.time() - time0 self.write_to_cache(dasquery, expire, url, api, args, dasrows, ctime) except Exception as exc: msg = 'Fail to process: url=%s, api=%s, args=%s' \ % (url, api, args) print(msg) print_exc(exc) close(datastream)
def create_indexes(coll, index_list): """ Create indexes for provided collection/index_list and ensure that they are in place """ index_info = coll.index_information().values() for pair in index_list: index_exists = 0 for item in index_info: if item['key'] == [pair]: index_exists = 1 if not index_exists: try: if isinstance(pair, list): coll.create_index(pair) else: coll.create_index([pair]) except Exception as exp: print_exc(exp) try: spec = pair if not isinstance(pair, list): spec = [pair] coll.create_index(spec) except Exception as exp: print_exc(exp)
def check_pid(self, pid, ahash): """ Check status of given pid and return appropriate page content. This is a server callback function for ajaxCheckPid, see js/ajax_utils.js """ cherrypy.response.headers['Cache-Control'] = 'no-cache' cherrypy.response.headers['Pragma'] = 'no-cache' img = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base page = '' try: if self.taskmgr.is_alive(pid): page = img + " processing PID=%s" % pid else: kwargs = self.reqmgr.get(pid) if kwargs and kwargs.has_key('dasquery'): del kwargs['dasquery'] # if no kwargs (another request delete it) # use logging DB to look-up user request via ahash if not kwargs: spec = {'ahash':ahash} skey = [('ts', DESCENDING)] res = [r for r in self.logcol.find(spec).sort(skey)] kwargs = res[0]['args'] self.adjust_input(kwargs) self.reqmgr.remove(pid) page = self.get_page_content(kwargs) except Exception as err: msg = 'check_pid fails for pid=%s' % pid print dastimestamp('DAS WEB ERROR '), msg print_exc(err) self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.error(gen_error_msg({'pid':pid}), wrap=False) return page
def onhold_worker(dasmgr, taskmgr, reqmgr, limit): "Worker daemon to process onhold requests" if not dasmgr or not taskmgr or not reqmgr: return print "### START onhold_worker", time.time() jobs = [] while True: try: while jobs: try: reqmgr.remove(jobs.pop(0)) except: break nrequests = reqmgr.size() for rec in reqmgr.items_onhold(): dasquery = DASQuery(rec['uinput']) addr = rec['ip'] kwargs = {'input':rec['uinput']} if (nrequests - taskmgr.nworkers()) < limit: _evt, pid = taskmgr.spawn(\ dasmgr.call, dasquery, \ addr, pid=dasquery.qhash) jobs.append(pid) reqmgr.remove_onhold(str(rec['_id'])) except AutoReconnect: pass except Exception as err: print_exc(err) pass time.sleep(5) print "### END onhold_worker", time.time()
def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') col = self.localcache.conn[self.name][cname] local = col.find_one({'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if not r.has_key('expire')][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def get_records(self, col, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB. It correctly applies" if fields: for key in fields: # ensure that fields keys will be presented if key not in self.das_internal_keys and \ not spec.has_key(key): spec.update({key: {'$exists':True}}) try: res = col.find(spec=spec, fields=fields) if skeys: res = res.sort(skeys) if not unique: if idx: res = res.skip(idx) if limit: res = res.limit(limit) except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row if unique: if limit: gen = itertools.islice(unique_filter(res), idx, idx+limit) else: gen = unique_filter(res) for row in gen: yield row else: for row in res: yield row
def dbs_daemon(self, config): """Start DBS daemon if it is requested via DAS configuration""" try: main_dbs_url = self.dasconfig['dbs']['dbs_global_url'] self.dbs_urls = [] for inst in self.dbs_instances: self.dbs_urls.append(\ main_dbs_url.replace(self.dbs_global, inst)) interval = config.get('dbs_daemon_interval', 3600) dbsexpire = config.get('dbs_daemon_expire', 3600) self.dbsmgr = {} # dbs_urls vs dbs_daemons if self.dataset_daemon: for dbs_url in self.dbs_urls: dbsmgr = DBSDaemon(dbs_url, self.dburi, expire=dbsexpire) self.dbsmgr[dbs_url] = dbsmgr def dbs_updater(_dbsmgr, interval): """DBS updater daemon""" while True: try: _dbsmgr.update() except: pass time.sleep(interval) print "Start DBSDaemon for %s" % dbs_url thread.start_new_thread(dbs_updater, (dbsmgr, interval, )) except Exception as exc: print_exc(exc)
def get_page_content(self, kwargs, complete_msg=True): """Retrieve page content for provided set of parameters""" html_views = ['list', 'table'] page = '' try: view = kwargs.get('view', 'list') if view == 'plain': if 'limit' in kwargs: del kwargs['limit'] if view in ['json', 'xml', 'plain'] and complete_msg: page = 'Request completed. Reload the page ...' else: head, data = self.get_data(kwargs) allowed_views = ['list', 'table', 'plain', 'xml', 'json'] if view not in allowed_views: raise func = getattr(self, view + "view") page = func(head, data) except HTTPError as _err: raise except Exception as exc: print_exc(exc) msg = gen_error_msg(kwargs) page = self.templatepage('das_error', msg=msg) return page
def stop(self): """ Stop the daemon """ # Get the pid from the pidfile try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = None if not pid: message = "pidfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pidfile) return # not an error in a restart # Try killing the daemon process try: while 1: os.kill(pid, SIGTERM) time.sleep(0.1) except OSError as err: if err.find("No such process") > 0: if os.path.exists(self.pidfile): os.remove(self.pidfile) else: print_exc(err) sys.exit(1)
def datasets_dbs(self): """ Retrieve a list of DBS datasets (DBS2) """ query = "find dataset,dataset.status" params = {"api": "executeQuery", "apiversion": "DBS_2_0_9", "query": query} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + "?" + encoded_data req = urllib2.Request(url) try: stream = urllib2.urlopen(req) except urllib2.HTTPError: msg = "Fail to contact %s" % url print dastimestamp("DAS ERROR"), msg raise Exception(msg) except Exception as exc: print_exc(exc) msg = "Fail to contact %s" % url print dastimestamp("DAS ERROR"), msg raise Exception(msg) gen = qlxml_parser(stream, "dataset") for row in gen: dataset = row["dataset"]["dataset"] rec = {"dataset": dataset} if self.write_hash: storage_query = { "fields": ["dataset"], "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}], "instance": self.dbcoll, } rec.update({"qhash": genkey(storage_query)}) if row["dataset"]["dataset.status"] == "VALID": yield rec stream.close()
def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') conn = db_connection(self.dburi) col = conn[self.name][cname] local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if 'expire' not in r][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def fltpage(self, row): """Prepare filter snippet for a given query""" rowkeys = [] page = '' if row and row.has_key('das') and row['das'].has_key('primary_key'): pkey = row['das']['primary_key'] if pkey and (isinstance(pkey, str) or isinstance(pkey, unicode)): try: mkey = pkey.split('.')[0] if isinstance(row[mkey], list): # take first five or less entries from the list to cover # possible aggregated records and extract row keys lmax = len(row[mkey]) if len(row[mkey]) < 5 else 5 sublist = [row[mkey][i] for i in range(0, lmax)] ndict = DotDict({mkey:sublist}) rowkeys = [k for k in ndict.get_keys(mkey)] else: rowkeys = [k for k in DotDict(row).get_keys(mkey)] rowkeys.sort() rowkeys += ['das.conflict'] dflt = das_filters() + das_aggregators() dflt.remove('unique') page = self.templatepage('das_filters', \ filters=dflt, das_keys=rowkeys) except Exception as exc: msg = "Fail to pkey.split('.') for pkey=%s" % pkey print msg print_exc(exc) pass return page
def __init__(self, name, config): self.name = name try: self.verbose = config['verbose'] title = 'DASAbstactService_%s' % self.name self.logger = PrintManager(title, self.verbose) self.dasmapping = config['dasmapping'] self.analytics = config['dasanalytics'] self.write2cache = config.get('write_cache', True) self.multitask = config['das'].get('multitask', True) self.error_expire = config['das'].get('error_expire', 300) if config.has_key('dbs'): self.dbs_global = config['dbs'].get('dbs_global_instance', None) else: self.dbs_global = None dburi = config['mongodb']['dburi'] engine = config.get('engine', None) self.gfs = db_gridfs(dburi) except Exception as exc: print_exc(exc) raise Exception('fail to parse DAS config') # read key/cert info try: self.ckey, self.cert = get_key_cert() except Exception as exc: print_exc(exc) self.ckey = None self.cert = None if self.multitask: nworkers = config['das'].get('api_workers', 3) thr_weights = config['das'].get('thread_weights', []) for system_weight in thr_weights: system, weight = system_weight.split(':') if system == self.name: nworkers *= int(weight) if engine: thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name self.taskmgr = PluginTaskManager(\ engine, nworkers=nworkers, name=thr_name) self.taskmgr.subscribe() else: thr_name = 'DASAbstractService:%s:TaskManager' % self.name self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None self.map = {} # to be defined by data-service implementation self._keys = None # to be defined at run-time in self.keys self._params = None # to be defined at run-time in self.parameters self._notations = {} # to be defined at run-time in self.notations self.logger.info('initialized') # define internal cache manager to put 'raw' results into cache if config.has_key('rawcache') and config['rawcache']: self.localcache = config['rawcache'] else: msg = 'Undefined rawcache, please check your configuration' raise Exception(msg)
def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB." try: conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[coll] nres = col.find(spec, exhaust=True).count() if nres == 1 or nres <= limit: limit = 0 if limit: res = col.find(spec=spec, fields=fields, sort=skeys, skip=idx, limit=limit) else: res = col.find(spec=spec, fields=fields, sort=skeys, exhaust=True) if unique: res = unique_filter(res) for row in res: yield row except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row
def get_status(self, dasquery): """ Look-up status of provided query in a cache. Return status of the query request and its hash. """ status = None error = None reason = None if dasquery and 'fields' in dasquery.mongo_query: fields = dasquery.mongo_query['fields'] if fields and isinstance(fields, list) and 'queries' in fields: return 'ok', error, reason record = self.rawcache.find(dasquery) error, reason = self.rawcache.is_error_in_records(dasquery) try: if record and 'das' in record and 'status' in record['das']: status = record['das']['status'] if not error: error = record['das'].get('error', error) if not reason: reason = record['das'].get('reason', reason) return status, error, reason except Exception as exc: print_exc(exc) status = error = reason = None self.rawcache.remove_from_cache(dasquery) return status, error, reason
def pass_apicall(self, dasquery, url, api, api_params): """ Filter provided apicall wrt existing apicall records in Analytics DB. """ self.analytics.remove_expired() msg = 'API=%s, args=%s' % (api, api_params) for row in self.analytics.list_apicalls(url=url, api=api): input_query = {'spec':api_params} exist_query = {'spec':row['apicall']['api_params']} if compare_specs(input_query, exist_query): msg += '\nwill re-use existing api call with args=%s, query=%s'\ % (row['apicall']['api_params'], exist_query) self.logger.info(msg) try: # update DAS cache with empty result set args = self.inspect_params(api, api_params) cond = {'das.qhash': row['apicall']['qhash']} record = self.localcache.col.find_one(cond) if record and record.has_key('das') and \ record['das'].has_key('expire'): expire = record['das']['expire'] self.write_to_cache(\ dasquery, expire, url, api, args, [], 0) except Exception as exc: print_exc(exc) msg = 'failed api %s\n' % api msg += 'input query %s\n' % input_query msg += 'existing query %s\n' % exist_query msg += 'Unable to look-up existing query and extract ' msg += 'expire timestamp' raise Exception(msg) return False return True
def get_page_content(self, kwargs, complete_msg=True): """Retrieve page content for provided set of parameters""" page = "" try: view = kwargs.get("view", "list") if view == "plain": if "limit" in kwargs: del kwargs["limit"] if view in ["json", "xml", "plain"] and complete_msg: page = "Request completed. Reload the page ..." else: head, data = self.get_data(kwargs) allowed_views = ["list", "table", "plain", "xml", "json"] if view not in allowed_views: raise func = getattr(self, view + "view") page = func(head, data) except HTTPError as _err: raise except Exception as exc: print_exc(exc) msg = gen_error_msg(kwargs) page = self.templatepage("das_error", msg=msg) return page
def dbs_daemon(self, config): """Start DBS daemon if it is requested via DAS configuration""" try: main_dbs_url = self.dbs_url dbs_urls = [] print "### DBS URL:", self.dbs_url print "### DBS instances:", self.dbs_instances if not self.dbs_url or not self.dbs_instances: return # just quit for inst in self.dbs_instances: dbs_urls.append((main_dbs_url.replace(self.dbs_global, inst), inst)) interval = config.get("dbs_daemon_interval", 3600) dbsexpire = config.get("dbs_daemon_expire", 3600) preserve_dbs_col = config.get("preserve_on_restart", False) dbs_config = {"expire": dbsexpire, "preserve_on_restart": preserve_dbs_col} if self.dataset_daemon: for dbs_url, inst in dbs_urls: dbsmgr = DBSDaemon(dbs_url, self.dburi, dbs_config) self.dbsmgr[(dbs_url, inst)] = dbsmgr def dbs_updater(_dbsmgr, interval): """DBS updater daemon""" while True: try: _dbsmgr.update() except: pass time.sleep(interval) print "### Start DBSDaemon for %s" % dbs_url thname = "dbs_updater:%s" % dbs_url start_new_thread(thname, dbs_updater, (dbsmgr, interval)) except Exception as exc: print_exc(exc)
def sitedb_parser(source): """SiteDB parser""" if isinstance(source, str) or isinstance(source, unicode): data = json.loads(source) # elif hasattr(source, "close") or isinstance(source, file): elif hasattr(source, "close"): # got data descriptor try: data = json.load(source) except Exception as exc: print_exc(exc) source.close() raise source.close() else: data = source if not isinstance(data, dict): raise Exception('Wrong data type, %s' % type(data)) if 'desc' in data: columns = data['desc']['columns'] for row in data['result']: yield rowdict(columns, row) else: for row in data['result']: yield row
def init(self): """ Establish connection to MongoDB back-end and create DB. """ col = None try: conn = db_connection(self.dburi) if conn: dbc = conn[self.dbname] col = dbc[self.colname] # print "### DASMapping:init started successfully" except ConnectionFailure as _err: tstamp = dastimestamp("") thread = threading.current_thread() print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (thread.name, thread.ident, tstamp) except Exception as exc: print_exc(exc) if col: index = [ ("type", DESCENDING), ("system", DESCENDING), ("urn", DESCENDING), ("das_map.das_key", DESCENDING), ("das_map.rec_key", DESCENDING), ("das_map.api_arg", DESCENDING), ] create_indexes(col, index)
def makepy(self, dataset, instance): """ Request to create CMSSW py snippet for a given dataset """ pat = re.compile('/.*/.*/.*') if not pat.match(dataset): msg = 'Invalid dataset name' return self.error(msg) query = "file dataset=%s instance=%s | grep file.name" \ % (dataset, instance) try: data = self.dasmgr.result(query, idx=0, limit=0) except Exception as exc: print_exc(exc) msg = 'Exception: %s\n' % str(exc) msg += 'Unable to retrieve data for query=%s' % query return self.error(msg) lfns = [] for rec in data: filename = DotDict(rec).get('file.name') if filename not in lfns: lfns.append(filename) page = self.templatepage('das_files_py', lfnList=lfns, pfnList=[], isinstance=isinstance, list=list) cherrypy.response.headers['Content-Type'] = "text/plain" return page
def __init__(self, query, **flags): """ Accepts general form of DAS query, supported formats are DAS input query, DAS mongo query, DAS storage query. The supplied flags can carry any query attributes, e.g. filters, aggregators, system, instance, etc. """ self._mongoparser = None self._params = {} self._service_apis_map = {} self._str = '' self._query = '' self._storage_query = {} self._mongo_query = {} self._qhash = None self._system = None self._instance = None self._loose_query = None self._pattern_query = None self._sortkeys = [] self._filters = {} self._mapreduce = [] self._aggregators = [] self._flags = flags # loop over flags and set available attributes for key, val in flags.iteritems(): setattr(self, '_%s' % key, val) # test data type of input query and apply appropriate initialization if isinstance(query, basestring): self._query = query try: self._mongo_query = self.mongoparser.parse(query) for key, val in flags.iteritems(): if key in ['mongoparser']: continue if not self._mongo_query.has_key(key): self._mongo_query[key] = val except Exception as exp: msg = "Fail to parse DAS query='%s'" % query print_exc(msg, print_traceback=False) raise exp elif isinstance(query, dict): newquery = {} for key, val in query.iteritems(): newquery[key] = val if isinstance(newquery.get('spec'), dict): # mongo query self._mongo_query = newquery else: # storage query self._storage_query = newquery elif isinstance(query, object) and hasattr(query, '__class__')\ and query.__class__.__name__ == 'DASQuery': self._query = query.query self._mongo_query = query.mongo_query self._storage_query = query.storage_query else: raise Exception('Unsupport data type of DAS query') self.update_attr()
def process_requests_onhold(self): "Process requests which are on hold" try: limit = self.queue_limit/2 thread.start_new_thread(onhold_worker, \ (self.dasmgr, self.taskmgr, self.reqmgr, limit)) except Exception as exc: print_exc(exc)
def __init__(self, name, config): self.name = name try: self.verbose = config['verbose'] title = 'DASAbstactService_%s' % self.name self.logger = PrintManager(title, self.verbose) self.dasmapping = config['dasmapping'] self.write2cache = config.get('write_cache', True) self.multitask = config['das'].get('multitask', True) self.error_expire = config['das'].get('error_expire', 300) self.dbs_global = None # to be configured at run time self.dburi = config['mongodb']['dburi'] engine = config.get('engine', None) self.gfs = db_gridfs(self.dburi) except Exception as exc: print_exc(exc) raise Exception('fail to parse DAS config') # read key/cert info try: self.ckey, self.cert = get_key_cert() except Exception as exc: print_exc(exc) self.ckey = None self.cert = None if self.multitask: nworkers = config['das'].get('api_workers', 3) thr_weights = config['das'].get('thread_weights', []) for system_weight in thr_weights: system, weight = system_weight.split(':') if system == self.name: nworkers *= int(weight) # if engine: # thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name # self.taskmgr = PluginTaskManager(\ # engine, nworkers=nworkers, name=thr_name) # self.taskmgr.subscribe() # else: # thr_name = 'DASAbstractService:%s:TaskManager' % self.name # self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) thr_name = 'DASAbstractService:%s:TaskManager' % self.name self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None self.map = {} # to be defined by data-service implementation self._keys = None # to be defined at run-time in self.keys self._params = None # to be defined at run-time in self.parameters self._notations = {} # to be defined at run-time in self.notations self.logger.info('initialized') # define internal cache manager to put 'raw' results into cache if 'rawcache' in config and config['rawcache']: self.localcache = config['rawcache'] else: msg = 'Undefined rawcache, please check your configuration' raise Exception(msg)
def records(self, *args, **kwargs): """ Retieve all records id's. """ try: recordid = None if args: recordid = args[0] spec = {'_id':ObjectId(recordid)} fields = None query = dict(fields=fields, spec=spec) elif kwargs and kwargs.has_key('_id'): spec = {'_id': ObjectId(kwargs['_id'])} fields = None query = dict(fields=fields, spec=spec) else: # return all ids query = dict(fields=None, spec={}) res = '' time0 = time.time() idx = getarg(kwargs, 'idx', 0) limit = getarg(kwargs, 'limit', 10) coll = kwargs.get('collection', 'merge') inst = kwargs.get('instance', self.dbs_global) form = self.form(uinput="") check, content = self.generate_dasquery(query, inst) if check: return self.page(form + content, ctime=time.time()-time0) dasquery = content # returned content is valid DAS query nresults = self.dasmgr.rawcache.nresults(dasquery, coll) gen = self.dasmgr.rawcache.get_from_cache\ (dasquery, idx=idx, limit=limit, collection=coll) if recordid: # we got id for row in gen: res += das_json(row) else: for row in gen: rid = row['_id'] del row['_id'] res += self.templatepage('das_record', \ id=rid, collection=coll, daskeys=', '.join(row)) if recordid: page = res else: url = '/das/records?' if nresults: page = self.templatepage('das_pagination', \ nrows=nresults, idx=idx, limit=limit, url=url) else: page = 'No results found, nresults=%s' % nresults page += res ctime = (time.time()-time0) page = self.page(form + page, ctime=ctime) return page except Exception as exc: print_exc(exc) return self.error(gen_error_msg(kwargs))
def run(self): """Run thread loop.""" while True: func, args, kargs = self.tasks.get() try: func(*args, **kargs) except Exception as exp: print_exc(exp) self.tasks.task_done()
def generate_dasquery(self, uinput, inst, html_error=True): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) """ def helper(msg, html_error=None): """Helper function which provide error template""" if not html_error: return msg guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) page = self.templatepage('das_ambiguous', msg=msg, base=self.base, guide=guide) return page if not uinput: return 1, helper('No input query') # Generate DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst) except Exception as err: return 1, helper(das_parser_error(uinput, str(err)), html_error) fields = dasquery.mongo_query.get('fields', []) if not fields: fields = [] spec = dasquery.mongo_query.get('spec', {}) for word in fields+spec.keys(): found = 0 if word in DAS_DB_KEYWORDS: found = 1 for key in self.daskeys: if word.find(key) != -1: found = 1 if not found: msg = 'Provided input does not contain a valid DAS key' return 1, helper(msg, html_error) if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find('queries') != -1: pass elif uinput.find('records') != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = 'Fail to lookup DASQuery service API map' print msg print_exc(exc) return 1, helper(msg, html_error) if not service_map: msg = "None of the API's registered in DAS " msg += "can resolve this query" return 1, helper(msg, html_error) return 0, dasquery
def __init__(self, name, config): self.name = name try: self.verbose = config["verbose"] title = "DASAbstactService_%s" % self.name self.logger = PrintManager(title, self.verbose) self.dasmapping = config["dasmapping"] self.write2cache = config.get("write_cache", True) self.multitask = config["das"].get("multitask", True) self.error_expire = config["das"].get("error_expire", 300) self.dbs_global = None # to be configured at run time self.dburi = config["mongodb"]["dburi"] engine = config.get("engine", None) self.gfs = db_gridfs(self.dburi) except Exception as exc: print_exc(exc) raise Exception("fail to parse DAS config") # read key/cert info try: self.ckey, self.cert = get_key_cert() except Exception as exc: print_exc(exc) self.ckey = None self.cert = None if self.multitask: nworkers = config["das"].get("api_workers", 3) thr_weights = config["das"].get("thread_weights", []) for system_weight in thr_weights: system, weight = system_weight.split(":") if system == self.name: nworkers *= int(weight) if engine: thr_name = "DASAbstractService:%s:PluginTaskManager" % self.name self.taskmgr = PluginTaskManager(engine, nworkers=nworkers, name=thr_name) self.taskmgr.subscribe() else: thr_name = "DASAbstractService:%s:TaskManager" % self.name self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None self.map = {} # to be defined by data-service implementation self._keys = None # to be defined at run-time in self.keys self._params = None # to be defined at run-time in self.parameters self._notations = {} # to be defined at run-time in self.notations self.logger.info("initialized") # define internal cache manager to put 'raw' results into cache if "rawcache" in config and config["rawcache"]: self.localcache = config["rawcache"] else: msg = "Undefined rawcache, please check your configuration" raise Exception(msg)
def add_onhold(self, pid, uinput, addr, future_tstamp): """Add user input to onhold collection""" tstamp = time.strftime("%Y%m%d %H:%M:%S", time.localtime()) doc = dict(_id=pid, ip=addr, uinput=uinput, \ ts=future_tstamp, timestamp=tstamp) try: self.hold.insert(doc, safe=True) except DuplicateKeyError: pass except Exception as err: print_exc(err)
def filter_bar(self, dasquery): "Construct filter bar UI element and returned for given input" if dasquery.filters: # if we have filter/aggregator get one row from the given query try: if dasquery.mongo_query: fltpage = self.fltpage(dasquery) except Exception as exc: fltpage = 'N/A, please check DAS record for errors' msg = 'Fail to apply filter to query=%s' % dasquery.query print(msg) print_exc(exc) else: fltpage = '' return fltpage
def init(self): """Init DAS web server, connect to DAS Core""" try: self.reqmgr = RequestManager(lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dbs_url = self.dasmapping.dbs_url() self.dbs_global = self.dasmapping.dbs_global_instance() self.dbs_instances = self.dasmapping.dbs_instances() self.dasmapping.init_presentationcache() self.colors = {'das':gen_color('das')} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) if not self.daskeyslist: keylist = [r for r in self.dasmapping.das_presentation_map()] keylist.sort(key=lambda r: r['das']) self.daskeyslist = keylist except ConnectionFailure as _err: tstamp = dastimestamp('') mythr = threading.current_thread() print("### MongoDB connection failure thread=%s, id=%s, time=%s" \ % (mythr.name, mythr.ident, tstamp)) except Exception as exc: print_exc(exc) self.dasmgr = None self.reqmgr = None self.dbs_url = None self.dbs_global = None self.dbs_instances = [] self.daskeys = [] self.colors = {} self.q_rewriter = None return # KWS and Query Rewriting failures are not fatal try: # init query rewriter, if needed if self.dasconfig['query_rewrite']['pk_rewrite_on']: self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage) except Exception as exc: print_exc(exc) self.q_rewriter = None
def das_populator_helper(dasmgr, query, expire): """Process DAS query through DAS Core and sets new expire tstamp for it""" try: # To allow re-use of queries feeded by DAS populator # we need to ensure that instance is present in DAS query, # since web interface does it by default. dasquery = dasmgr.adjust_query(query) if 'instance' not in dasquery: raise Exception('Supplied query does not have DBS instance') newts = expire_timestamp(expire) # process DAS query dasmgr.call(dasquery) # update DAS expire timestamp dasmgr.rawcache.update_das_expire(dasquery, newts) print("\n### DAS populator", query, dasquery, expire, newts) except Exception as exc: print_exc(exc)
def init(self): """Init DAS web server, connect to DAS Core""" try: self.dasmgr = DASCore(multitask=False) self.dbs_instances = self.dasmgr.mapping.dbs_instances() self.dbs_global = self.dasmgr.mapping.dbs_global_instance() if KeywordSearchHandler: self.kws = KeywordSearchHandler(self.dasmgr) except ConnectionFailure: tstamp = dastimestamp('') mythr = threading.current_thread() print("### MongoDB connection failure thread=%s, id=%s, time=%s" \ % (mythr.name, mythr.ident, tstamp)) except Exception as exc: print_exc(exc) self.dasmgr = None self.kws = None
def query_db(dbname, dbcol, query, idx=0, limit=10): """ query a given db collection """ conn = db_connection(get_db_uri()) col = conn[dbname][dbcol] if col: try: if limit == -1: for row in col.find(query, **PYMONGO_OPTS): yield row else: for row in col.find(query).skip(idx).limit(limit): yield row except Exception as exc: # we shall not catch GeneratorExit print_exc(exc)
def quote(data): """ Sanitize the data using cgi.escape. """ if isinstance(data, (int, long, float, dict, list)): res = data elif isinstance(data, ObjectId): res = str(data) else: try: if data: res = cgi.escape(data, quote=True) else: res = "" except Exception as exc: print_exc(exc) print("Unable to cgi.escape(%s, quote=True)" % data) res = "" return res
def delete(self, system=None): """ Delete expired documents in das.cache. """ spec = {'das.expire':{'$lt':time.time()}} if system: spec['das.system'] = system msg = "Found %s expired documents" % self.cache.find(spec).count() try: if pymongo.version.startswith('3.'): # pymongo 3.X coll.delete_many(spec) else: self.cache.remove(spec) msg += ", delete operation [OK]" print(msg) except Exception as exc: msg += ", delete operation [FAIL]" print(msg) print_exc(exc)
def gridfs(self, **kwargs): """ Retieve records from GridFS """ time0 = time.time() if 'fid' not in kwargs: code = web_code('No file id') raise HTTPError(500, 'DAS error, code=%s' % code) fid = kwargs.get('fid') data = {'status':'requested', 'fid':fid} try: fds = self.gfs.get(ObjectId(fid)) return fds.read() except Exception as exc: print_exc(exc) code = web_code('Exception') raise HTTPError(500, 'DAS error, code=%s' % code) data['ctime'] = time.time() - time0 return json.dumps(data)
def dbs_find(entity, url, kwds, verbose=0): "Find DBS3 entity for given set of parameters" if entity not in ['run', 'file', 'block']: msg = 'Unsupported entity key=%s' % entity raise Exception(msg) expire = 600 dataset = kwds.get('dataset', None) block = kwds.get('block_name', None) if not block: # TODO: this should go away when DBS will be retired (user in combined srv) block = kwds.get('block', None) lfn = kwds.get('file', None) runs = kwds.get('runs', []) if not (dataset or block or lfn): return url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value if dataset: params = {'dataset':dataset} elif block: params = {'block_name': block} elif lfn: params = {'logical_file_name': lfn} if runs: params.update({'run_num': runs}) headers = {'Accept': 'application/json;text/json'} source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row: try: if isinstance(rec, basestring): print(dastimestamp('DBS3 ERROR:'), row) elif entity == 'file': yield rec['logical_file_name'] elif entity == 'block': yield rec['block_name'] elif entity == 'file': yield rec['dataset'] except Exception as exp: msg = 'Fail to parse "%s", exception="%s"' % (rec, exp) print_exc(msg)
def check_pid(self, pid): """ Check status of given pid. This is a server callback function for ajaxCheckPid, see js/ajax_utils.js """ # do not allow caching set_no_cache_flags() img = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base page = '' try: if self.taskmgr.is_alive(pid): page = img + " processing PID=%s" % pid else: # at this point we don't know if request arrived to this host # or it was processed. To distinguish the case we'll ask # request manager for that pid if self.reqmgr.has_pid(pid): self.reqmgr.remove(pid) self.taskmgr.remove(pid) page = 'Request PID=%s is completed' % pid page += ', please wait for results to load' else: # there're no request on this server, re-initiate it ref = cherrypy.request.headers.get('Referer', None) if ref: url = urlparse(ref) params = dict(parse_qsl(url.query)) return self.request(**params) else: msg = 'No referer in cherrypy.request.headers' msg += '\nHeaders: %s' % cherrypy.request.headers dasprint(dastimestamp('DAS WEB ERROR '), msg) except Exception as err: msg = 'check_pid fails for pid=%s' % pid dasprint(dastimestamp('DAS WEB ERROR '), msg) print_exc(err) self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.error(gen_error_msg({'pid':pid}), wrap=False) return page
def apicall(self, dasquery, url, api, args, dformat, expire): """ A service worker. It parses input query, invoke service API and return results in a list with provided row. """ # NOTE: I use helper function since it is 2 step process # therefore the expire time stamp will not be changed, since # helper function will yield results time0 = time.time() if api == 'dataset4site_release' or \ api == 'site4dataset' or 'files4dataset_runs_site': genrows = self.helper(api, args, expire) # here I use directly the call to the service which returns # proper expire timestamp. Moreover I use HTTP header to look # at expires and adjust my expire parameter accordingly # NOTE: disable dataset4site, lumi4site since they take too much load # see combined.yml # if api == 'dataset4site': # headers = {'Accept': 'application/json;text/json'} # datastream, expire = \ # getdata(url, args, headers, expire, system='combined') # genrows = parse_data(datastream) # if api == 'lumi4dataset': # headers = {'Accept': 'application/json;text/json'} # data, expire = \ # getdata(url, args, headers, expire, system='combined') # genrows = json_parser(data, None) # proceed with standard workflow ctime = time.time() - time0 try: if isinstance(url, dict): url = "combined: %s" % url.values() self.write_to_cache(dasquery, expire, url, api, \ args, genrows, ctime) except Exception as exc: print_exc(exc)
def get_new_connection(self, uri): "Get new MongoDB connection" key = self.genkey(uri) for idx in range(0, self.retry): try: dbinst = MongoClient(host=uri, **self.mongo_opts) # dbinst = MongoConnection(uri, **self.mongo_opts).client() gfs = dbinst.gridfs fsinst = gridfs.GridFS(gfs) self.conndict[key] = (dbinst, fsinst) self.timedict[key] = time.time() return (dbinst, fsinst) except (ConnectionFailure, AutoReconnect) as exc: tstamp = dastimestamp('') thread = threading.current_thread() print("### MongoDB connection failure thread=%s, id=%s, time=%s" \ % (thread.name, thread.ident, tstamp)) print_exc(exc) except Exception as exc: print_exc(exc) time.sleep(idx) return self.conndict.get(key, (None, None))
def get_records(self, coll, spec, fields, skeys, idx, limit, unique=False): "Generator to get records from MongoDB." try: conn = db_connection(self.dburi) mdb = conn[self.dbname] mdb.add_son_manipulator(self.das_son_manipulator) col = mdb[coll] nres = col.find(spec, **PYMONGO_OPTS).count() if nres == 1 or nres <= limit: limit = 0 if limit: res = col.find(spec, fields, sort=skeys, skip=idx, limit=limit) else: res = col.find(spec, fields, sort=skeys, **PYMONGO_OPTS) if unique: res = unique_filter(res) for row in res: yield row except Exception as exp: print_exc(exp) row = {'exception': str(exp)} res = [] yield row
def run(self): """Run thread loop.""" while True: if self.exit: return if isinstance(self._tasks, PriorityQueue): _, uid, task = self._tasks.get() else: task = self._tasks.get() if task == None: return evt, pid, func, args, kwargs = task try: if isinstance(self._tasks, PriorityQueue): self._uids.discard(uid) func(*args, **kwargs) self._pids.discard(pid) except Exception as err: self._pids.discard(pid) print_exc(err) print("\n### args", func, args, kwargs) self._tasks.task_done() evt.set()
def get_result_fieldlist(self, row): rowkeys = [] if row and 'das' in row and 'primary_key' in row['das']: pkey = row['das']['primary_key'] if pkey and (isinstance(pkey, str) or isinstance(pkey, unicode)): try: mkey = pkey.split('.')[0] if mkey not in row: return [] if isinstance(row[mkey], list): # take first five or less entries from the list to cover # possible aggregated records and extract row keys ndict = DotDict({mkey: row[mkey][:10]}) rowkeys = list(ndict.get_keys(mkey)) else: rowkeys = list(DotDict(row).get_keys(mkey)) rowkeys.sort() rowkeys += ['das.conflict'] except Exception as exc: # TODO: pkey.split fail only if called on non-string msg = "Fail to pkey.split('.') for pkey=%s" % pkey print(msg) print_exc(exc) return rowkeys
def das_json(dasquery, record, pad='', full=False): """ Wrap provided jsonhtml code snippet into div/pre blocks. Provided jsonhtml snippet is sanitized by json2html function. """ error = None if full: return das_json_full(record, pad) mquery = dasquery.mongo_query daskeys = ['das_id', 'cache_id', 'qhash', 'das', '_id'] fields = mquery.get('fields', None) if fields: lkeys = [l for l in fields if l not in daskeys] else: lkeys = [] # get das.systems and primary key das = record['das'] if 'error' in record: error = { 'error': record.get('error'), 'reason': record.get('reason', '') } srvs = das.get('system', []) apis = das.get('api', []) prim_key = das.get('primary_key', '').split('.')[0] if not srvs or not prim_key or len(apis) != len(srvs): return das_json_full(record, pad) try: pval = record[prim_key] except Exception as exc: return das_json_full(record, pad) if isinstance(pval, list) and len(pval) != len(srvs): return das_json_full(record, pad) if not isinstance(pval, list): return das_json_full(record, pad) try: page = '<div class="code">' for idx in range(0, len(srvs)): srv = srvs[idx] api = apis[idx] if lkeys: rec = {prim_key: pval[idx]} for lkey in [l for l in lkeys if l != prim_key]: if lkey != 'error' and lkey != 'reason': rec[lkey] = record[lkey][idx] val = das_json_full(rec) else: val = das_json_full(pval[idx]) style = 'background-color:%s;color:%s;' % gen_color(srv) page += '\n<b>DAS service:</b> ' page += '<span style="%s;padding:3px">%s</span> ' % (style, srv) if srv == 'combined': page += das_json_services(srv, das) page += '<b>DAS api:</b> %s' % api page += '\n<pre style="%s">%s</pre>' % (style, val) page += '\n<b>DAS part:</b><pre>%s</pre>' % das_json_full(das) if error: page += '\n<b>Errors:</b><pre>%s</pre>' % das_json_full(error) rhash = { 'qhash': record.get('qhash', None), 'das_id': record.get('das_id', None), 'cache_id': record.get('cache_id', None) } page += '<b>Hashes</b>: <pre>%s</pre>' % das_json_full(rhash) rlink = '/das/records/%s?collection=merge&view=json' % record['_id'] page += '<br/>Download <a href="%s">raw record</a>' % rlink page += '</div>' except Exception as exc: print_exc(exc) return das_json_full(record, pad) return page
def apicall(self, dasquery, url, api, args, dformat, expire): """ A service worker. It parses input query, invoke service API and return results in a list with provided row. """ cond = dasquery.mongo_query['spec'] count = 0 for key, value in cond.items(): err = 'JobSummary does not support key=%s, value=%s' \ % (key, value) if not isinstance(value, dict): # we got equal condition if key == 'date': if isinstance(value, list) and len(value) != 2: msg = 'Dashboard service requires 2 time stamps.' msg += 'Please use either date last XXh format or' msg += 'date in [YYYYMMDD, YYYYMMDD]' raise Exception(msg) if isinstance(value, str) or isinstance(value, unicode): value = convert2date(value) else: value = [value, value + 24 * 60 * 60] args['date1'] = convert_datetime(value[0]) args['date2'] = convert_datetime(value[1]) count += 1 else: for param in self.dasmapping.das2api(self.name, api, key): args[param] = value count += 1 else: # we got some operator, e.g. key :{'$in' : [1,2,3]} if key == 'date' or key == 'jobsummary': if '$in' in value: vallist = value['$in'] elif '$lte' in value and '$gte' in value: vallist = (value['$gte'], value['$lte']) else: raise Exception(err) args['date1'] = convert_datetime(vallist[0]) args['date2'] = convert_datetime(vallist[-1]) count += 1 else: raise Exception(err) if not count: # if no parameter are given, don't pass the API msg = 'DashboardService::api\n\n' msg += "--- %s reject API %s, parameters don't match, args=%s" \ % (self.name, api, args) self.logger.info(msg) return else: if not args['date1']: args['date1'] = convert_datetime(time.time() - 24 * 60 * 60) if not args['date2']: args['date2'] = convert_datetime(time.time()) # drop date argument, since it's used by DAS not by dashboard data srv if 'date' in args: args.pop('date') time0 = time.time() res, expire = self.getdata(url, args, expire, headers=self.headers) rawrows = self.parser(res, api, args) dasrows = self.translator(api, rawrows) ctime = time.time() - time0 try: self.write_to_cache(\ dasquery, expire, url, api, args, dasrows, ctime) except Exception as exc: print_exc(exc)
def helper(self, api, args, expire): """ Class helper function which yields results for given set of input parameters. It yeilds the data record which must contain combined attribute corresponding to systems used to produce record content. """ dbs_url = self.map[api]['services'][self.dbs] phedex_url = self.map[api]['services']['phedex'] # make phedex_api from url, but use xml version for processing phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas' if api == 'dataset4site_release' or \ api == 'dataset4site_release_parent' or \ api == 'child4site_release_dataset': # DBS part datasets = set() release = args['release'] parent = args.get('parent', None) for row in dbs_dataset4release_parent(dbs_url, release, parent): datasets.add(row) # Phedex part if args['site'].find('.') != -1: # it is SE phedex_args = { 'dataset': list(datasets), 'se': '%s' % args['site'] } else: phedex_args = { 'dataset': list(datasets), 'node': '%s*' % args['site'] } headers = {'Accept': 'text/xml'} source, expire = \ getdata(phedex_api, phedex_args, headers, expire, system='phedex') prim_key = 'block' tags = 'block.replica.node' found = {} for rec in xml_parser(source, prim_key, tags): ddict = DotDict(rec) block = ddict.get('block.name') bbytes = ddict.get('block.bytes') files = ddict.get('block.files') found_dataset = block.split('#')[0] if found_dataset in found: val = found[found_dataset] found[found_dataset] = { 'bytes': val['bytes'] + bbytes, 'files': val['files'] + files } else: found[found_dataset] = {'bytes': bbytes, 'files': files} for name, val in found.items(): record = dict(name=name, size=val['bytes'], files=val['files']) if api == 'child4site_release_dataset': yield {'child': record} else: yield {'dataset': record} del datasets del found if api == 'site4dataset': try: gen = site4dataset(dbs_url, phedex_api, args, expire) for row in gen: sname = row.get('site', {}).get('name', '') skind = self.site_info(phedex_url, sname) row['site'].update({'kind': skind}) yield row except Exception as err: print_exc(err) tstamp = dastimestamp('') msg = tstamp + ' Exception while processing DBS/Phedex info:' msg += str(err) row = { 'site': { 'name': 'Fail to look-up site info', 'error': msg, 'dataset_fraction': 'N/A', 'block_fraction': 'N/A', 'block_completion': 'N/A' }, 'error': msg } yield row if api == 'files4dataset_runs_site' or \ api == 'files4block_runs_site': run_value = args.get('run', []) if isinstance(run_value, dict) and '$in' in run_value: runs = run_value['$in'] elif isinstance(run_value, list): runs = run_value else: if int_number_pattern.match(str(run_value)): runs = [run_value] else: runs = [] args.update({'runs': runs}) files = dbs_find('file', dbs_url, args) site = args.get('site') phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas' for fname in files4site(phedex_api, files, site): yield {'file': {'name': fname}}
def __init__(self, query, **flags): """ Accepts general form of DAS query, supported formats are DAS input query, DAS mongo query, DAS storage query. The supplied flags can carry any query attributes, e.g. filters, aggregators, system, instance, etc. """ check_query(query) self._mongoparser = None self._params = {} self._service_apis_map = {} self._str = '' self._query = '' self._query_pat = '' self._query_full = '' self._storage_query = {} self._mongo_query = {} self._qhash = None self._hashes = None self._system = None self._instance = None self._loose_query = None self._pattern_query = None self._sortkeys = [] self._filters = {} self._mapreduce = [] self._aggregators = [] self._qcache = 0 self._flags = flags self._error = '' # loop over flags and set available attributes for key, val in flags.items(): setattr(self, '_%s' % key, val) # test data type of input query and apply appropriate initialization if isinstance(query, basestring): self._query = query try: self._mongo_query = self.mongoparser.parse(query) for key, val in flags.items(): if key in self.NON_CACHEABLE_FLAGS: continue if key not in self._mongo_query: self._mongo_query[key] = val except Exception as exp: msg = "Fail to parse DAS query='%s', %s" % (query, str(exp)) print_exc(msg, print_traceback=True) self._mongo_query = {'error': msg, 'spec': {}, 'fields': []} self._storage_query = {'error': msg} self._error = msg # raise exp elif isinstance(query, dict): newquery = {} for key, val in query.items(): newquery[key] = val if isinstance(newquery.get('spec'), dict): # mongo query self._mongo_query = newquery else: # storage query self._storage_query = newquery elif isinstance(query, object) and hasattr(query, '__class__')\ and query.__class__.__name__ == 'DASQuery': self._query = query.query self._query_pat = query.query_pat self._hashes = query.hashes self._mongo_query = query.mongo_query self._storage_query = query.storage_query else: # raise Exception('Unsupported data type of DAS query') self._error = 'Unsupported data type of DAS query' if self._error: return self.update_attr() # check dataset wild-cards for key, val in self._mongo_query['spec'].items(): if key == 'dataset.name': if isinstance(val, dict): # we get {'$in':[a,b]} continue # only match dataset.name but do not primary_dataset.name if not RE_3SLASHES.match(val): # TODO: we currently do not support wildcard matching # from command line interface if not self._instance: continue # apply 3 slash pattern look-up, continuing only if one # interpretation existings here, ticket #3071 self._handle_dataset_slashes(key, val)
def __init__(self, config=None, debug=0, nores=False, logger=None, engine=None, multitask=True): if config: dasconfig = config else: dasconfig = das_readconfig() verbose = dasconfig['verbose'] self.stdout = debug if isinstance(debug, int) and debug: self.verbose = debug dasconfig['verbose'] = debug else: self.verbose = verbose das_timer('DASCore::init', self.verbose) self.operators = das_operators() self.collect_wait_time = dasconfig['das'].get('collect_wait_time', 120) # set noresults option self.noresults = False if nores: dasconfig['write_cache'] = True self.noresults = nores self.init_expire = dasconfig['das'].get('init_expire', 5 * 60) self.multitask = dasconfig['das'].get('multitask', True) if debug or self.verbose: self.multitask = False # in verbose mode do not use multitask dasconfig['das']['multitask'] = False if not multitask: # explicitly call DASCore ctor self.multitask = False dasconfig['das']['multitask'] = False dasconfig['engine'] = engine if self.multitask: nworkers = dasconfig['das'].get('core_workers', 5) # if engine: # thr_name = 'DASCore:PluginTaskManager' # self.taskmgr = PluginTaskManager(\ # engine, nworkers=nworkers, name=thr_name) # self.taskmgr.subscribe() # else: # thr_name = 'DASCore:TaskManager' # self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) thr_name = 'DASCore:TaskManager' self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None if logger: self.logger = logger else: self.logger = PrintManager('DASCore', self.verbose) # define Mapping/Analytics/Parser in this order since Parser depends # on first two dasmapping = DASMapping(dasconfig) dasconfig['dasmapping'] = dasmapping self.mapping = dasmapping self.keylearning = DASKeyLearning(dasconfig) dasconfig['keylearning'] = self.keylearning # init DAS cache self.rawcache = DASMongocache(dasconfig) dasconfig['rawcache'] = self.rawcache # plug-in architecture: loop over registered data-services in # dasconfig; load appropriate module/class; register data # service with DASCore. self.systems = dasmapping.list_systems() # pointer to the DAS top level directory dasroot = '/'.join(__file__.split('/')[:-3]) for name in self.systems: try: klass = 'DAS/services/%s/%s_service.py' \ % (name, name) srvfile = os.path.join(dasroot, klass) with open(srvfile) as srvclass: for line in srvclass: if line.find('(DASAbstractService)') != -1: klass = line.split('(DASAbstractService)')[0] klass = klass.split('class ')[-1] break mname = 'DAS.services.%s.%s_service' % (name, name) module = __import__(mname, fromlist=[klass]) obj = getattr(module, klass)(dasconfig) setattr(self, name, obj) except IOError as err: if debug > 1: # we have virtual services, so IOError can be correct print_exc(err) try: mname = 'DAS.services.generic_service' module = __import__(mname, fromlist=['GenericService']) obj = module.GenericService(name, dasconfig) setattr(self, name, obj) except Exception as exc: print_exc(exc) msg = "Unable to load %s data-service plugin" % name raise Exception(msg) except Exception as exc: print_exc(exc) msg = "Unable to load %s data-service plugin" % name raise Exception(msg) # loop over systems and get system keys, add mapping keys to final list self.service_keys = {} self.service_parameters = {} for name in self.systems: skeys = list(getattr(self, name).keys()) self.service_keys[getattr(self, name).name] = skeys sparams = getattr(self, name).parameters() self.service_parameters[getattr(self, name).name] = sparams self.service_keys['special'] = das_special_keys() self.dasconfig = dasconfig das_timer('DASCore::init', self.verbose)