def init(self): """Init DAS web server, connect to DAS Core""" try: self.reqmgr = RequestManager(lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dbs_url = self.dasmapping.dbs_url() self.dbs_global = self.dasmapping.dbs_global_instance() self.dbs_instances = self.dasmapping.dbs_instances() self.dasmapping.init_presentationcache() self.colors = {'das':gen_color('das')} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) if not self.daskeyslist: keylist = [r for r in self.dasmapping.das_presentation_map()] keylist.sort(key=lambda r: r['das']) self.daskeyslist = keylist except ConnectionFailure as _err: tstamp = dastimestamp('') mythr = threading.current_thread() print("### MongoDB connection failure thread=%s, id=%s, time=%s" \ % (mythr.name, mythr.ident, tstamp)) except Exception as exc: print_exc(exc) self.dasmgr = None self.reqmgr = None self.dbs_url = None self.dbs_global = None self.dbs_instances = [] self.daskeys = [] self.colors = {} self.q_rewriter = None return # KWS and Query Rewriting failures are not fatal try: # init query rewriter, if needed if self.dasconfig['query_rewrite']['pk_rewrite_on']: self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage) except Exception as exc: print_exc(exc) self.q_rewriter = None
def init(self): """Init DAS web server, connect to DAS Core""" try: self.logcol = DASLogdb(self.dasconfig) self.reqmgr = RequestManager(self.dburi, lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dasmapping.init_presentationcache() self.colors = {} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) self.sitedbmgr = SiteDBService(self.dasconfig) except Exception as exc: print_exc(exc) self.dasmgr = None self.daskeys = [] self.colors = {} return # Start Onhold_request daemon if self.dasconfig['web_server'].get('onhold_daemon', False): self.process_requests_onhold()
def setUp(self): """ set up stuff """ self.debug = 0 dasconfig = deepcopy(das_readconfig()) self.dburi = dasconfig['mongodb']['dburi'] self.reqmgr = RequestManager(self.dburi)
def init(self): """Init DAS web server, connect to DAS Core""" try: self.reqmgr = RequestManager(lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dbs_url = self.dasmapping.dbs_url() self.dbs_global = self.dasmapping.dbs_global_instance() self.dbs_instances = self.dasmapping.dbs_instances() self.dasmapping.init_presentationcache() self.colors = {"das": gen_color("das")} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) # get SiteDB from global scope self.sitedbmgr = SERVICES.get("sitedb2", None) # Start DBS daemon if self.dataset_daemon: self.dbs_daemon(self.dasconfig["web_server"]) if not self.daskeyslist: keylist = [r for r in self.dasmapping.das_presentation_map()] keylist.sort(key=lambda r: r["das"]) self.daskeyslist = keylist except ConnectionFailure as _err: tstamp = dastimestamp("") mythr = threading.current_thread() print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (mythr.name, mythr.ident, tstamp) except Exception as exc: print_exc(exc) self.dasmgr = None self.reqmgr = None self.dbs_url = None self.dbs_global = None self.dbs_instances = [] self.daskeys = [] self.colors = {} self.q_rewriter = None return # KWS and Query Rewriting failures are not fatal try: # init query rewriter, if needed if self.dasconfig["query_rewrite"]["pk_rewrite_on"]: self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage) except Exception as exc: print_exc(exc) self.q_rewriter = None
class testDAS_RegMgr(unittest.TestCase): """ A test class for the DAS RequestManager """ def setUp(self): """ set up stuff """ self.debug = 0 dasconfig = deepcopy(das_readconfig()) self.dburi = dasconfig['mongodb']['dburi'] self.reqmgr = RequestManager(self.dburi) def test_reqmgr(self): """Test reqmgr methods""" pid = 1 kwds = {'uinput':'bla'} self.reqmgr.add(pid, kwds) result = self.reqmgr.get(pid) self.assertEqual(kwds, result) self.reqmgr.remove(pid) result = self.reqmgr.get(pid) self.assertEqual(None, result)
class DASWebService(DASWebManager): """ DAS web service interface. """ def __init__(self, dasconfig): DASWebManager.__init__(self, dasconfig) config = dasconfig['web_server'] self.pid_pat = re.compile(r'^[a-z0-9]{32}') # TODO: self.base shall be automatically included in all tmpls self.base = config['url_base'] self.interval = config.get('status_update', 2500) self.engine = config.get('engine', None) self.check_clients = config.get('check_clients', False) nworkers = config['web_workers'] self.hot_thr = config.get('hot_threshold', 3000) self.dasconfig = dasconfig self.dburi = self.dasconfig['mongodb']['dburi'] self.lifetime = self.dasconfig['mongodb']['lifetime'] self.queue_limit = config.get('queue_limit', 50) qtype = config.get('qtype', 'Queue') qfreq = config.get('qfreq', 5) if qtype not in ['Queue', 'PriorityQueue']: msg = 'Wrong queue type, qtype=%s' % qtype raise Exception(msg) # if self.engine: # thr_name = 'DASWebService:PluginTaskManager' # self.taskmgr = PluginTaskManager(bus=self.engine, \ # nworkers=nworkers, name=thr_name, qtype=qtype, \ # qfreq=qfreq) # self.taskmgr.subscribe() # else: # thr_name = 'DASWebService:TaskManager' # self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name, \ # qtype=qtype, qfreq=qfreq) thr_name = 'DASWebService:TaskManager' self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name, \ qtype=qtype, qfreq=qfreq) self.adjust = config.get('adjust_input', False) self.dasmgr = None # defined at run-time via self.init() self.reqmgr = None # defined at run-time via self.init() self.daskeys = [] # defined at run-time via self.init() self.colors = {} # defined at run-time via self.init() self.dbs_url = None # defined at run-time via self.init() self.dbs_global = None # defined at run-time via self.init() self.dbs_instances = [] # defined at run-time via self.init() self.kws = None # defined at run-time via self.init() self.q_rewriter = None # defined at run-time via self.init() self.dataset_daemon = None self.dbsmgr = {} # dbs_urls vs dbs_daemons, defined at run-time self.daskeyslist = [] # list of DAS keys self.init() self.dbs_init(config) # Monitoring thread which performs auto-reconnection thname = 'dascore_monitor' start_new_thread(thname, dascore_monitor, \ ({'das':self.dasmgr, 'uri':self.dburi}, self.init, 5)) def dbs_init(self, config): """Initialize DBS daemons""" main_dbs_url = self.dbs_url dbs_urls = [] print("### DBS URL:", self.dbs_url) print("### DBS global instance:", self.dbs_global) print("### DBS instances:", self.dbs_instances) for inst in self.dbs_instances: dbs_urls.append(\ (main_dbs_url.replace(self.dbs_global, inst), inst)) interval = config.get('dbs_daemon_interval', 3600) dbsexpire = config.get('dbs_daemon_expire', 3600) preserve_dbs_col = config.get('preserve_on_restart', False) dbs_config = {'expire': dbsexpire, 'preserve_on_restart': preserve_dbs_col} for dbs_url, inst in dbs_urls: dbsmgr = DBSDaemon(dbs_url, self.dburi, dbs_config) self.dbsmgr[(dbs_url, inst)] = dbsmgr def init(self): """Init DAS web server, connect to DAS Core""" try: self.reqmgr = RequestManager(lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dbs_url = self.dasmapping.dbs_url() self.dbs_global = self.dasmapping.dbs_global_instance() self.dbs_instances = self.dasmapping.dbs_instances() self.dasmapping.init_presentationcache() self.colors = {'das':gen_color('das')} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) if not self.daskeyslist: keylist = [r for r in self.dasmapping.das_presentation_map()] keylist.sort(key=lambda r: r['das']) self.daskeyslist = keylist except ConnectionFailure as _err: tstamp = dastimestamp('') mythr = threading.current_thread() print("### MongoDB connection failure thread=%s, id=%s, time=%s" \ % (mythr.name, mythr.ident, tstamp)) except Exception as exc: print_exc(exc) self.dasmgr = None self.reqmgr = None self.dbs_url = None self.dbs_global = None self.dbs_instances = [] self.daskeys = [] self.colors = {} self.q_rewriter = None return # KWS and Query Rewriting failures are not fatal try: # init query rewriter, if needed if self.dasconfig['query_rewrite']['pk_rewrite_on']: self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage) except Exception as exc: print_exc(exc) self.q_rewriter = None @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def redirect(self, **kwargs): """ Represent DAS redirect page """ dmsg = 'You do not have permission to access the resource requested.' msg = kwargs.get('reason', dmsg) if msg: msg = 'Reason: ' + msg page = self.templatepage('das_redirect', msg=msg) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def dumpthreads(self, **kwargs): """ Represent DAS redirect page """ dumpstacks('web call', 'web frame') msg = 'Thread dump performed: %s' % time.strftime("%Y%m%d %H:%M:%S GMT", time.gmtime()) return self.page(msg, response_div=False) def bottom(self, response_div=True): """ Define footer for all DAS web pages """ tstamp = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) return self.templatepage('das_bottom', div=response_div, base=self.base, version=DAS.version, time=time) def page(self, content, ctime=None, response_div=True): """ Define footer for all DAS web pages """ page = self.top() page += content page += self.templatepage('das_bottom', ctime=ctime, base=self.base, version=DAS.version, div=response_div, time=time) return page @expose @checkargs(DAS_WEB_INPUTS + ['section', 'highlight']) @tools.secmodv2() def faq(self, **kwargs): """ represent DAS FAQ. """ section = kwargs.get('section', None) highlight = kwargs.get('highlight', None) guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) daskeys = self.templatepage('das_keys', daskeys=self.daskeyslist) page = self.templatepage('das_faq', guide=guide, daskeys=daskeys, section=section, highlight=highlight, operators=', '.join(das_operators()), aggregators=', '.join(das_aggregators())) return self.page(page, response_div=False) @expose @tools.secmodv2() def cli(self): """ Serve DAS CLI file download. """ msg = 'Please use dasgoclient which is available in any CMSSW releases' return self.page(msg) # dasroot = '/'.join(__file__.split('/')[:-3]) # clifile = os.path.join(dasroot, 'DAS/tools/das_client.py') # return serve_file(clifile, content_type='text/plain') @expose @tools.secmodv2() def movetodas(self): "Placeholder page for DBS to DAS migration" style = \ "width:600px;margin-left:auto;margin-right:auto;padding-top:20px" page = """<div style="%s">""" % style page += "Dear user,<br/>DBS Data Discovery page is depricated.<br/>" page += "Please migrate to Data Aggregation Service located at" page += "<p>https://cmsweb.cern.ch/das/</p>" page += "<em>CMS HTTP group.</em>" page += "</div>""" return page @expose @tools.secmodv2() def opensearch(self): """ Serve DAS opensearch file. """ if self.base and self.base.find('http://') != -1: base = self.base else: base = 'http://cmsweb.cern.ch/das' desc = self.templatepage('das_opensearch', base=base) cherrypy.response.headers['Content-Type'] = \ 'application/opensearchdescription+xml' return desc @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def keys(self, **kwds): """ Show DAS keys and their attibutes """ adict = {} for row in self.dasmgr.keylearning.attributes(): try: qpat = row.get('query_pat', []) key, attr = row['member'].split('.', 1) except: continue if key in adict: vdict = adict[key] if attr in vdict: vdict[attr] += qpat else: vdict[attr] = qpat adict[key] = vdict else: adict[key] = {attr: qpat} view = kwds.get('view', '') if view == 'json': return json.dumps(adict) page = self.templatepage('das_keys_attrs', attrs=adict) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def services(self): """ represent DAS services """ dasdict = {} daskeys = set() dasmapkeys = list(self.dasmgr.mapping.dasmapscache.keys()) dasmapkeys.sort() for key in dasmapkeys: srv, urn = key if srv not in self.dasmgr.systems: continue entry = self.dasmgr.mapping.dasmapscache[key] tmpdict = {} for item in entry['das_map']: dkey = item['das_key'] rkey = item['rec_key'] daskeys.add(dkey) vlist = tmpdict.get(dkey, []) + [rkey] tmpdict[dkey] = list(set(vlist)) apis = [] if srv in dasdict: vdict = dasdict[srv] okeys = vdict['keys'] apis = vdict['apis'] + [urn] for kkk, vvv in okeys.items(): vlist = tmpdict.get(kkk, []) + vvv tmpdict[kkk] = list(set(vlist)) else: apis = [urn] vdict = dict(keys=dict(tmpdict), apis=apis) dasdict[srv] = vdict mapreduce = [r for r in self.dasmgr.rawcache.get_map_reduce()] page = self.templatepage('das_services', dasdict=dasdict, dbses=self.dbs_instances, dbs_global=self.dbs_global, daskeys=list(daskeys), mapreduce=mapreduce, urllib=urllib) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) def nsystems(self): """ Return number of systems participating in DAS """ systems = self.dasmgr.mapping.list_systems() return "DAS systems %s" % ','.join(systems) @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def api(self, system, name): """ Return DAS mapping record about provided API. """ record = self.dasmgr.mapping.api_info(system, name) page = "<b>DAS mapping record</b>" page += das_json_full(record) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def default(self, *args, **kwargs): """ Default method. """ return self.index(args, kwargs) def adjust_input(self, kwargs): """ Adjust user input wrt common DAS keyword patterns, e.g. /Zee/*/* -> dataset=*Zee*, T1_US -> site=T1_US. More ambiguous input (such as Zee -> dataset=*Zee*) is however left to be handled by the keyword search. This is active only if adjust_input is set in DAS server configuration. """ if not self.adjust: return uinput = kwargs.get('input', '') inst = kwargs.get('instance', self.dbs_global) kwargs['input'] = identify_apparent_query_patterns(uinput, inst) def _get_dbsmgr(self, inst): """ Given a string representation of DBS instance, returns DBSManager instance which "knows" how to look up datasets """ mgr = None # instance selection shall be more clean if not self.dataset_daemon: return mgr for dbs_url, dbs_inst in self.dbsmgr.keys(): if dbs_inst == inst: return self.dbsmgr[(dbs_url, dbs_inst)] return mgr def _get_kws_host(self): """ gets the host for keyword search from config. default is same server """ return self.dasconfig['load_balance']['kws_host'] def _get_autocompl_host(self): """ gets the host for autocompletion from config. default is same server """ conf = self.dasconfig.get('load_balance', {}) return conf.get('autocompletion_host', '') def is_kws_enabled(self): """ is keyword search client (ajax request) enabled """ return self.dasconfig['keyword_search']['kws_on'] def is_kws_service_enabled(self): """ is keyword search service (response to ajax call) enabled """ return self.dasconfig['keyword_search']['kws_service_on'] def generate_dasquery(self, uinput, inst, html_mode=True, qcache=0): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) :param uinput: user's input :param inst: DBS instance :param html_mode: whether errors shall be output in html """ def error_msg(msg, show_kws=False, tmpl='das_ambiguous', **kwargs): """ Helper function which renders an error template, default is das_ambiguous, but can be overriden via tmpl param. Template has two versions: html and text for CLI. The template is passed with msg, base, guide, and **kwargs. """ # TODO: this shall be done by inheriting a parent template # TODO: no header/footer? guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) # render keyword search loader, if needed kws = '' if show_kws: kws = self.templatepage('kwdsearch_via_ajax', uinput=uinput, jsonize=jsonize, url_extend_params_as_dict=url_extend_params_as_dict, inst=inst or self.dbs_global, kws_host=self._get_kws_host()) # render the appropriate template (html vs text mode) page = self.templatepage(tmpl + ('_txt' if not html_mode else ''), msg=msg, base=self.base, guide=guide, kws_enabled=show_kws, kws=kws, **kwargs) return page if not uinput: return 1, error_msg('No input query') # Generate a DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst, qcache=qcache) except WildcardMultipleMatchesException as err: # TODO: hints could be shown here also, but it makes no sense, as # they are shown only when no matches are found if isinstance(err.options.values, list) and err.options.values: return 1, error_msg(str(err), tmpl='das_wildcard_err', suggest=err.options.values, url_extend_params=url_extend_params) return 1, error_msg(str(err), tmpl='das_wildcard_err', url_extend_params=url_extend_params) except WildcardMatchingException as err: kwds = {'input':uinput, 'instance':inst} hints = self.hint_datasets(kwds) page = error_msg(str(err)) for hint in hints: page += self.templatepage('hint', url_extend_params=url_extend_params, hint=hint, base=self.base, dbs=self.dbs_global) return 1, page except Exception as err: # show multiple dataset matches for 1 keyword queries if hasattr(response, 'dataset_matches_msg'): return 1, error_msg(response.dataset_matches_msg, show_kws=self.is_kws_enabled()) # for non Wildcard parsing errors, show the Keyword Search return 1, error_msg(str(err), show_kws=self.is_kws_enabled()) if dasquery.error: return 1, error_msg(dasquery.error) # DAS query validation if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find('queries') != -1: pass elif uinput.find('records') != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = 'Fail to obtain service API map for this DASQuery' print(msg) print_exc(exc) return 1, error_msg(msg) if not service_map: return 1, error_msg('Unable to resolve the query over the ' 'available services: %s' % dasquery) return 0, dasquery @expose @checkargs(DAS_WEB_INPUTS) # @tools.secmodv2() def index(self, *args, **kwargs): """ represents DAS web interface. It uses das_searchform template for input form and yui_table for output Table widget. """ uinput = getarg(kwargs, 'input', '') return self.page(self.form(uinput=uinput, cards=True)) def form(self, uinput='', instance=None, view='list', cards=False): """ provide input DAS search form """ # TODO: rename into search_form()? (template is also called like this if "'" in uinput: # e.g. file.creation_date>'20120101 12:01:01' uinput = uinput.replace("'", '"') if not instance: instance = self.dbs_global hcards = help_cards(self.base) width = 900 height = 220 cards = self.templatepage('das_cards', base=self.base, show=cards, \ width=width, height=height, max_width=len(hcards)*width, \ cards=hcards, enumerate=enumerate) daskeys = self.templatepage('das_keys', daskeys=self.daskeyslist) page = self.templatepage('das_searchform', input=uinput, \ init_dbses=list(self.dbs_instances), daskeys=daskeys, \ base=self.base, instance=instance, view=view, cards=cards, autocompl_host=json.dumps(self._get_autocompl_host()) ) return page @expose @tools.secmodv2() def error(self, msg, wrap=True): """ Show error message. """ page = self.templatepage('das_error', msg=str(msg)) if wrap: page = self.page(self.form() + page) return page @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def gridfs(self, **kwargs): """ Retieve records from GridFS """ time0 = time.time() if 'fid' not in kwargs: code = web_code('No file id') raise HTTPError(500, 'DAS error, code=%s' % code) fid = kwargs.get('fid') data = {'status':'requested', 'fid':fid} try: fds = self.gfs.get(ObjectId(fid)) return fds.read() except Exception as exc: print_exc(exc) code = web_code('Exception') raise HTTPError(500, 'DAS error, code=%s' % code) data['ctime'] = time.time() - time0 return json.dumps(data) @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def records(self, *args, **kwargs): """ Retieve all records id's. """ try: recordid = None if args: recordid = args[0] spec = {'_id':ObjectId(recordid)} fields = None query = dict(fields=fields, spec=spec) elif kwargs and '_id' in kwargs: spec = {'_id': ObjectId(kwargs['_id'])} fields = None query = dict(fields=fields, spec=spec) else: # return all ids query = dict(fields=None, spec={}) res = '' time0 = time.time() idx = getarg(kwargs, 'idx', 0) limit = getarg(kwargs, 'limit', 50) coll = kwargs.get('collection', 'merge') view = kwargs.get('view', '') if view == 'json': res = [] inst = kwargs.get('instance', self.dbs_global) form = self.form(uinput="") check, content = self.generate_dasquery(query, inst) if check: return self.page(form + content, ctime=time.time()-time0) dasquery = content # returned content is valid DAS query nresults = self.dasmgr.rawcache.nresults(dasquery, coll) gen = self.dasmgr.rawcache.get_from_cache\ (dasquery, idx=idx, limit=limit, collection=coll) if recordid: # we got id for row in gen: if view == 'json': res.append(row) else: res += das_json(dasquery, row) else: for row in gen: rid = row['_id'] del row['_id'] res += self.templatepage('das_record', \ id=rid, collection=coll, daskeys=', '.join(row)) if recordid: page = res else: url = '/das/records?' if nresults: page = self.templatepage('das_pagination', \ nrows=nresults, idx=idx, limit=limit, url=url, \ cgi=cgi, str=str) else: page = 'No results found, nresults=%s' % nresults page += res ctime = (time.time()-time0) if view == 'json': return json.dumps(res) page = self.page(form + page, ctime=ctime) return page except Exception as exc: print_exc(exc) return self.error(gen_error_msg(kwargs)) @jsonstreamer def datastream(self, kwargs): """Stream DAS data into JSON format""" head = kwargs.get('head', dict(timestamp=time.time())) if 'mongo_query' not in head: head['mongo_query'] = head['dasquery'].mongo_query \ if 'dasquery' in head else {} if 'dasquery' in head: del head['dasquery'] if 'args' in head: del head['args'] data = kwargs.get('data', []) if self.check_clients: # update client version cli, cli_msg = check_client_version() head.update({'client': cli, 'client_message': cli_msg}) return head, data def hint_datasets(self, kwargs): "Use hint functions to find datasets in non-default DBS istances" query = kwargs.get('input', '').strip() dbsinst = kwargs.get('instance', self.dbs_global) hint_functions = [hint_dataset_case_insensitive, hint_dataset_in_other_insts, ] hints = (hint(query, dbsinst) for hint in hint_functions) hints = [r for r in hints if r and r.get('results')] return hints def get_data(self, kwargs): """ Invoke DAS workflow and get data from the cache. """ head = dict(timestamp=time.time()) head['args'] = kwargs uinput = kwargs.get('input', '') inst = kwargs.get('instance', self.dbs_global) idx = getarg(kwargs, 'idx', 0) limit = getarg(kwargs, 'limit', 0) # do not impose limit coll = kwargs.get('collection', 'merge') status = kwargs.get('status') error = kwargs.get('error') reason = kwargs.get('reason') dasquery = kwargs.get('dasquery', None) time0 = time.time() if dasquery: dasquery = DASQuery(dasquery, instance=inst) if dasquery.error: return self.page(form + dasquery.error, ctime=time.time()-time0) else: check, content = \ self.generate_dasquery(uinput, inst, html_mode=False) if check: head.update({'status': 'fail', 'reason': content, 'ctime': time.time()-time0, 'input': uinput}) data = [] return head, data dasquery = content # returned content is valid DAS query try: nres = self.dasmgr.nresults(dasquery, coll) data = \ self.dasmgr.get_from_cache(dasquery, idx, limit) # check that we got what we expected data = [r for r in data] if nres and not len(data): for retry in range(1, 3, 5): msg = 'retry in %s sec' % retry dasprint(dastimestamp('DAS WARNING '), msg, dasquery) time.sleep(retry) # retry one more time data = \ self.dasmgr.get_from_cache(dasquery, idx, limit) data = [r for r in data] if len(data): break if nres and not len(data): msg = 'fail to get all data for %s, nres=%s, len(data)=%s' \ % (dasquery, nres, len(data)) dasprint(dastimestamp('DAS WARNING '), msg) status = 'fail' reason = 'Fail to retrieve data from DAS cache, please retry' if dasquery.aggregators: # aggregators split DAS record into sub-system and then # apply aggregator functions, therefore we need to correctly # account for nresults. Resolve generator into list and take # its length as nresults value. data = [r for r in data] nres = len(data) if error: # DAS record contains an error status = 'error' head.update({'status':status, 'nresults':nres, 'ctime': time.time()-time0, 'dasquery': dasquery}) except Exception as exc: status = 'fail' reason = str(exc) print_exc(exc) head.update({'status': status, 'ctime': time.time()-time0, 'dasquery': dasquery}) data = [] head.update({'incache':self.dasmgr.incache(dasquery, coll='cache'), 'apilist':self.dasmgr.apilist(dasquery)}) if reason: head.update({'reason': reason}) if status != 'ok': head.update(self.info()) # check if query had dataset input and returned no results # then run hint functions to find dataset in other DBS instances mquery = dasquery.mongo_query empty = False for item in data: if 'dataset.name' in mquery['spec'] and 'dataset' in mquery['fields'] \ and 'result' not in item: if not item['dataset']: empty = True break if empty: # if no results found add dataset from other DBS instances hints = self.hint_datasets(kwargs) for item in data: item.update({'hints': hints}) return head, data def info(self): "Return status of DAS server" info = {'nrequests': self.reqmgr.size(), 'nworkers': self.taskmgr.nworkers(), 'dasweb': self.reqmgr.status()} if self.dasmgr and self.dasmgr.taskmgr: info.update({'dascore': self.dasmgr.taskmgr.status()}) return dict(das_server=info) def busy(self): """ Check server load and report busy status if nrequests - nworkers > queue limit """ nrequests = self.reqmgr.size() if (nrequests - self.taskmgr.nworkers()) > self.queue_limit: msg = '#request=%s, queue_limit=%s, #workers=%s' \ % (nrequests, self.taskmgr.nworkers(), self.queue_limit) dasprint(dastimestamp('DAS WEB SERVER IS BUSY '), msg) return True return False def busy_page(self, uinput=None): """DAS server busy page layout""" page = "<h3>DAS server is busy, please try later</h3>" form = self.form(uinput) return self.page(form + page) def _is_web_request(self, view): """ returns whether the current view mode is not web """ # first, check for explicit output type (view) if view in ['json', 'xml', 'plain']: return False # check accept header - e.g. das client only provides accept header accepts = cherrypy.request.headers.elements('Accept') non_html_accepts = ['application/json'] other_accepted = [a for a in accepts if a.value not in non_html_accepts] # if only non html content types are accepted we are in non html mode if not other_accepted and accepts: return False return True def empty_return(self, dasquery, status='busy', reason=None): "Return header/data when DAS server is busy" if not reason: reason = 'DAS server is busy' reason += ', #requests=%s, #workers=%s, queue size=%s' \ % (self.reqmgr.size(), self.taskmgr.nworkers(), self.queue_limit) head = dict(timestamp=time.time()) head.update({'status': status, 'reason': reason, 'ctime':0}) data = [] dasprint(dastimestamp('DAS INFO '), dasquery, 'server status=%s'%status, reason) return self.datastream(dict(head=head, data=data)) @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def cache(self, **kwargs): """ DAS web cache interface. Fire up new process for new requests and record its pid. The client is in charge to keep track of pid. The new process uses DAS core call to request the data into cache. Since query are cached the repeated call with the same query has no cost to DAS core. """ # do not allow caching set_no_cache_flags() # if busy return right away if self.busy(): return self.empty_return(kwargs) uinput = kwargs.get('input', '').strip() check_query(uinput) if not uinput: head = {'status': 'fail', 'reason': 'No input found', 'args': kwargs, 'ctime': 0, 'input': uinput} data = [] return self.datastream(dict(head=head, data=data)) self.adjust_input(kwargs) pid = kwargs.get('pid', '') inst = kwargs.get('instance', self.dbs_global) uinput = kwargs.get('input', '') view = kwargs.get('view', 'list') qcache = kwargs.get('qcache', 0) data = [] # textual views need text only error messages... check, content = self.generate_dasquery(uinput, inst, html_mode=self._is_web_request(view), qcache=qcache) if check: head = dict(timestamp=time.time()) head.update({'status': 'fail', 'reason': 'Can not interpret the query'+ \ ' (while creating DASQuery)', 'ctime': 0}) if not self._is_web_request(view): head['error_details'] = content head['reason'] = head['reason'] + '\n\n' + content return self.datastream(dict(head=head, data=data)) dasquery = content # returned content is valid DAS query status, error, reason = self.dasmgr.get_status(dasquery) kwargs.update({'status':status, 'error':error, 'reason':reason}) if not pid: pid = dasquery.qhash if status == None and not self.reqmgr.has_pid(pid): # submit new request uid = cherrypy.request.headers.get('Remote-Addr') if hasattr(cherrypy.request, 'user'): uid = cherrypy.request.user.get('dn', None) _evt, pid = self.taskmgr.spawn(\ self.dasmgr.call, dasquery, uid=uid, pid=dasquery.qhash) self.reqmgr.add(pid, kwargs) return pid if status == 'ok': self.reqmgr.remove(pid) self.taskmgr.remove(pid) kwargs['dasquery'] = dasquery head, data = self.get_data(kwargs) return self.datastream(dict(head=head, data=data)) kwargs['dasquery'] = dasquery.storage_query if not self.pid_pat.match(str(pid)) or len(str(pid)) != 32: self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.empty_return(dasquery, 'fail', 'Invalid pid') elif self.taskmgr.is_alive(pid): return pid elif status == None: # DAS was busy and query expired since status==None if not self.taskmgr.is_alive(pid) and self.reqmgr.has_pid(pid): self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.empty_return(dasquery, 'fail', 'request expired') return pid else: # process is done, get data self.reqmgr.remove(pid) self.taskmgr.remove(pid) head, data = self.get_data(kwargs) return self.datastream(dict(head=head, data=data)) def get_page_content(self, kwargs, complete_msg=True): """Retrieve page content for provided set of parameters""" html_views = ['list', 'table'] page = '' try: view = kwargs.get('view', 'list') if view == 'plain': if 'limit' in kwargs: del kwargs['limit'] if view in ['json', 'xml', 'plain'] and complete_msg: page = 'Request completed. Reload the page ...' else: head, data = self.get_data(kwargs) allowed_views = ['list', 'table', 'plain', 'xml', 'json'] if view not in allowed_views: raise func = getattr(self, view + "view") page = func(head, data) except HTTPError as _err: raise except Exception as exc: print_exc(exc) msg = gen_error_msg(kwargs) page = self.templatepage('das_error', msg=msg) return page @expose @tools.secmodv2() def download(self, lfn): "DAS download page for given LFN" page = self.templatepage('filemover', lfn=lfn) return self.page(page, response_div=False) @expose @tools.secmodv2() def makepy(self, dataset, instance): """ Request to create CMSSW py snippet for a given dataset """ pat = re.compile('/.*/.*/.*') if not pat.match(dataset): msg = 'Invalid dataset name' return self.error(msg) query = "file dataset=%s instance=%s | grep file.name" \ % (dataset, instance) try: data = self.dasmgr.result(query, idx=0, limit=0) except Exception as exc: print_exc(exc) msg = 'Exception: %s\n' % str(exc) msg += 'Unable to retrieve data for query=%s' % query return self.error(msg) lfns = [] for rec in data: filename = DotDict(rec).get('file.name') if filename not in lfns: lfns.append(filename) page = self.templatepage('das_files_py', lfnList=lfns, pfnList=[], isinstance=isinstance, list=list) cherrypy.response.headers['Content-Type'] = "text/plain" return page @expose @checkargs(DAS_WEB_INPUTS) @tools.secmodv2() def request(self, **kwargs): """ Request data from DAS cache. """ # do not allow caching set_no_cache_flags() uinput = kwargs.get('input', '').strip() check_query(uinput) if not uinput: kwargs['reason'] = 'No input found' return self.redirect(**kwargs) # if busy return right away if self.busy(): return self.busy_page(uinput) time0 = time.time() self.adjust_input(kwargs) view = kwargs.get('view', 'list') qcache = kwargs.get('qcache', 0) if 'instance' in uinput: form = self.form(uinput=uinput, view=view) content = 'On DAS web UI please use drop-down menu to specify DBS' content += ' instance to avoid ambiguity. ' content += 'To proceed please clear your input query.' return self.page(form + '<div class="box_red">%s</div>' % content) else: inst = kwargs.get('instance', self.dbs_global) uinput = kwargs.get('input', '') form = self.form(uinput=uinput, instance=inst, view=view) check, content = self.generate_dasquery(uinput, inst, qcache=qcache) if check: if view == 'list' or view == 'table': return self.page(form + content, ctime=time.time()-time0) else: return content dasquery = content # returned content is valid DAS query status, error, reason = self.dasmgr.get_status(dasquery) kwargs.update({'status':status, 'error':error, 'reason':reason}) pid = dasquery.qhash if status is None: # process new request kwargs['dasquery'] = dasquery.storage_query uid = cherrypy.request.headers.get('Remote-Addr') if hasattr(cherrypy.request, 'user'): uid = cherrypy.request.user.get('dn', None) _evt, pid = self.taskmgr.spawn(self.dasmgr.call, dasquery, uid=uid, pid=dasquery.qhash) self.reqmgr.add(pid, kwargs) elif status == 'ok' or status == 'fail': self.reqmgr.remove(pid) self.taskmgr.remove(pid) # check if query can be rewritten via nested PK query rew_msg = self.q_rewriter and self.q_rewriter.check_fields(dasquery) if rew_msg: content = self.templatepage('das_error', msg=rew_msg) return self.page(form + content, ctime=time.time()-time0) kwargs['dasquery'] = dasquery page = self.get_page_content(kwargs, complete_msg=False) ctime = (time.time()-time0) if view == 'list' or view == 'table': return self.page(form + page, ctime=ctime) return page if self.taskmgr.is_alive(pid): page = self.templatepage('das_check_pid', method='check_pid', uinput=uinput, view=view, urllib=urllib, base=self.base, pid=pid, interval=self.interval) elif status == None: # DAS was busy and query expired since status==None if not self.taskmgr.is_alive(pid) and self.reqmgr.has_pid(pid): self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.empty_return(dasquery, 'fail', 'request expired') page = self.templatepage('das_check_pid', method='check_pid', uinput=uinput, view=view, urllib=urllib, base=self.base, pid=pid, interval=self.interval) else: self.reqmgr.remove(pid) self.taskmgr.remove(pid) page = self.get_page_content(kwargs) ctime = (time.time()-time0) return self.page(form + page, ctime=ctime) @expose @tools.secmodv2() def status(self): """Return list of all current requests in DAS queue""" requests = [r for r in self.reqmgr.items()] page = self.templatepage('das_status', requests=requests, time=time) sdict = self.dasmgr.status() sdict['web'] = self.taskmgr.status() dasprint(dastimestamp('DAS INFO '), "web TaskManager", sdict['web']) for key, val in sdict.items(): dasprint(dastimestamp('DAS INFO '), "%s TaskManager %s" % (key, val)) page += '<h3>Services</h3>' def dump(idict): "Dump input dict" return ', '.join(['<em>%s:</em> %s' % (k, idict[k]) for k in sorted(idict)]) for key, val in sdict.items(): page += '<div>' stats = ', '.join([dump(v) for v in val.values()]) page += '<b>%s</b>: %s' % (key, stats) page += '</div>' return self.page(page) @expose @checkargs(['pid']) @tools.secmodv2() def check_pid(self, pid): """ Check status of given pid. This is a server callback function for ajaxCheckPid, see js/ajax_utils.js """ # do not allow caching set_no_cache_flags() img = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base page = '' try: if self.taskmgr.is_alive(pid): page = img + " processing PID=%s" % pid else: # at this point we don't know if request arrived to this host # or it was processed. To distinguish the case we'll ask # request manager for that pid if self.reqmgr.has_pid(pid): self.reqmgr.remove(pid) self.taskmgr.remove(pid) page = 'Request PID=%s is completed' % pid page += ', please wait for results to load' else: # there're no request on this server, re-initiate it ref = cherrypy.request.headers.get('Referer', None) if ref: url = urlparse(ref) params = dict(parse_qsl(url.query)) return self.request(**params) else: msg = 'No referer in cherrypy.request.headers' msg += '\nHeaders: %s' % cherrypy.request.headers dasprint(dastimestamp('DAS WEB ERROR '), msg) except Exception as err: msg = 'check_pid fails for pid=%s' % pid dasprint(dastimestamp('DAS WEB ERROR '), msg) print_exc(err) self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.error(gen_error_msg({'pid':pid}), wrap=False) return page def listview(self, head, data): """DAS listview data representation""" return self.repmgr.listview(head, data) def tableview(self, head, data): """DAS tabular view data representation""" return self.repmgr.tableview(head, data) def plainview(self, head, data): """DAS plain view data representation""" return self.repmgr.plainview(head, data) def xmlview(self, head, data): """DAS XML data representation""" return self.repmgr.xmlview(head, data) def jsonview(self, head, data): """DAS JSON data representation""" return self.repmgr.jsonview(head, data) @exposedasjson @enable_cross_origin @checkargs(['query', 'dbs_instance']) @tools.secmodv2() def autocomplete(self, **kwargs): """ Provides autocomplete functionality for DAS web UI. """ query = kwargs.get("query", "").strip() result = autocomplete_helper(query, self.dasmgr, self.daskeys) dataset = [r for r in result if r['value'].find('dataset=')!=-1] dbsinst = kwargs.get('dbs_instance', self.dbs_global) if self.dataset_daemon and len(dataset): dbsmgr = self._get_dbsmgr(dbsinst) # we shall autocomplete the last token so queries like # file dataset=/ZMM/.. are autocompleted prefix = '' if ' ' in query: prefix = ' '.join(query.split()[:-1]) + ' ' print('prefix=', prefix) query = query.split()[-1] if query.find('dataset=') != -1: query = query.replace('dataset=', '') for row in dbsmgr.find(query): result.append({'css': 'ac-info', 'value': prefix + 'dataset=%s' % row, 'info': 'dataset'}) return result
class DASWebService(DASWebManager): """ DAS web service interface. """ def __init__(self, dasconfig): DASWebManager.__init__(self, dasconfig) config = dasconfig["web_server"] self.pid_pat = re.compile(r"^[a-z0-9]{32}") self.base = config["url_base"] self.interval = config.get("status_update", 2500) self.engine = config.get("engine", None) self.check_clients = config.get("check_clients", False) nworkers = config["web_workers"] self.hot_thr = config.get("hot_threshold", 3000) self.dasconfig = dasconfig self.dburi = self.dasconfig["mongodb"]["dburi"] self.lifetime = self.dasconfig["mongodb"]["lifetime"] self.queue_limit = config.get("queue_limit", 50) qtype = config.get("qtype", "Queue") if qtype not in ["Queue", "PriorityQueue"]: msg = "Wrong queue type, qtype=%s" % qtype raise Exception(msg) if self.engine: thr_name = "DASWebService:PluginTaskManager" self.taskmgr = PluginTaskManager(bus=self.engine, nworkers=nworkers, name=thr_name, qtype=qtype) self.taskmgr.subscribe() else: thr_name = "DASWebService:TaskManager" self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name, qtype=qtype) self.adjust = config.get("adjust_input", False) self.dasmgr = None # defined at run-time via self.init() self.reqmgr = None # defined at run-time via self.init() self.daskeys = [] # defined at run-time via self.init() self.colors = {} # defined at run-time via self.init() self.dbs_url = None # defined at run-time via self.init() self.dbs_global = None # defined at run-time via self.init() self.kws = None # defined at run-time via self.init() self.q_rewriter = None # defined at run-time via self.init() self.dataset_daemon = config.get("dbs_daemon", False) self.dbsmgr = {} # dbs_urls vs dbs_daemons, defined at run-time self.daskeyslist = [] # list of DAS keys self.init() # Monitoring thread which performs auto-reconnection thname = "dascore_monitor" start_new_thread(thname, dascore_monitor, ({"das": self.dasmgr, "uri": self.dburi}, self.init, 5)) def dbs_daemon(self, config): """Start DBS daemon if it is requested via DAS configuration""" try: main_dbs_url = self.dbs_url dbs_urls = [] print "### DBS URL:", self.dbs_url print "### DBS instances:", self.dbs_instances if not self.dbs_url or not self.dbs_instances: return # just quit for inst in self.dbs_instances: dbs_urls.append((main_dbs_url.replace(self.dbs_global, inst), inst)) interval = config.get("dbs_daemon_interval", 3600) dbsexpire = config.get("dbs_daemon_expire", 3600) preserve_dbs_col = config.get("preserve_on_restart", False) dbs_config = {"expire": dbsexpire, "preserve_on_restart": preserve_dbs_col} if self.dataset_daemon: for dbs_url, inst in dbs_urls: dbsmgr = DBSDaemon(dbs_url, self.dburi, dbs_config) self.dbsmgr[(dbs_url, inst)] = dbsmgr def dbs_updater(_dbsmgr, interval): """DBS updater daemon""" while True: try: _dbsmgr.update() except: pass time.sleep(interval) print "### Start DBSDaemon for %s" % dbs_url thname = "dbs_updater:%s" % dbs_url start_new_thread(thname, dbs_updater, (dbsmgr, interval)) except Exception as exc: print_exc(exc) def init(self): """Init DAS web server, connect to DAS Core""" try: self.reqmgr = RequestManager(lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dbs_url = self.dasmapping.dbs_url() self.dbs_global = self.dasmapping.dbs_global_instance() self.dbs_instances = self.dasmapping.dbs_instances() self.dasmapping.init_presentationcache() self.colors = {"das": gen_color("das")} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) # get SiteDB from global scope self.sitedbmgr = SERVICES.get("sitedb2", None) # Start DBS daemon if self.dataset_daemon: self.dbs_daemon(self.dasconfig["web_server"]) if not self.daskeyslist: keylist = [r for r in self.dasmapping.das_presentation_map()] keylist.sort(key=lambda r: r["das"]) self.daskeyslist = keylist except ConnectionFailure as _err: tstamp = dastimestamp("") mythr = threading.current_thread() print "### MongoDB connection failure thread=%s, id=%s, time=%s" % (mythr.name, mythr.ident, tstamp) except Exception as exc: print_exc(exc) self.dasmgr = None self.reqmgr = None self.dbs_url = None self.dbs_global = None self.dbs_instances = [] self.daskeys = [] self.colors = {} self.q_rewriter = None return # KWS and Query Rewriting failures are not fatal try: # init query rewriter, if needed if self.dasconfig["query_rewrite"]["pk_rewrite_on"]: self.q_rewriter = CMSQueryRewrite(self.repmgr, self.templatepage) except Exception as exc: print_exc(exc) self.q_rewriter = None @expose @checkargs(DAS_WEB_INPUTS) def redirect(self, **kwargs): """ Represent DAS redirect page """ dmsg = "You do not have permission to access the resource requested." msg = kwargs.get("reason", dmsg) if msg: msg = "Reason: " + msg page = self.templatepage("das_redirect", msg=msg) return self.page(page, response_div=False) def bottom(self, response_div=True): """ Define footer for all DAS web pages """ return self.templatepage("das_bottom", div=response_div, base=self.base, version=DAS.version) def page(self, content, ctime=None, response_div=True): """ Define footer for all DAS web pages """ page = self.top() page += content page += self.templatepage("das_bottom", ctime=ctime, base=self.base, version=DAS.version, div=response_div) return page @expose @checkargs(DAS_WEB_INPUTS + ["section", "highlight"]) def faq(self, **kwargs): """ represent DAS FAQ. """ section = kwargs.get("section", None) highlight = kwargs.get("highlight", None) guide = self.templatepage("dbsql_vs_dasql", operators=", ".join(das_operators())) daskeys = self.templatepage("das_keys", daskeys=self.daskeyslist) page = self.templatepage( "das_faq", guide=guide, daskeys=daskeys, section=section, highlight=highlight, operators=", ".join(das_operators()), aggregators=", ".join(das_aggregators()), ) return self.page(page, response_div=False) @expose def cli(self): """ Serve DAS CLI file download. """ dasroot = "/".join(__file__.split("/")[:-3]) clifile = os.path.join(dasroot, "DAS/tools/das_client.py") return serve_file(clifile, content_type="text/plain") @expose def movetodas(self): "Placeholder page for DBS to DAS migration" style = "width:600px;margin-left:auto;margin-right:auto;padding-top:20px" page = """<div style="%s">""" % style page += "Dear user,<br/>DBS Data Discovery page is depricated.<br/>" page += "Please migrate to Data Aggregation Service located at" page += "<p>https://cmsweb.cern.ch/das/</p>" page += "<em>CMS HTTP group.</em>" page += "</div>" "" return page @expose def opensearch(self): """ Serve DAS opensearch file. """ if self.base and self.base.find("http://") != -1: base = self.base else: base = "http://cmsweb.cern.ch/das" desc = self.templatepage("das_opensearch", base=base) cherrypy.response.headers["Content-Type"] = "application/opensearchdescription+xml" return desc @expose @checkargs(DAS_WEB_INPUTS) def services(self): """ represent DAS services """ dasdict = {} daskeys = set() dasmapkeys = self.dasmgr.mapping.dasmapscache.keys() dasmapkeys.sort() for key in dasmapkeys: srv, urn = key if srv not in self.dasmgr.systems: continue entry = self.dasmgr.mapping.dasmapscache[key] tmpdict = {} for item in entry["das_map"]: dkey = item["das_key"] rkey = item["rec_key"] daskeys.add(dkey) vlist = tmpdict.get(dkey, []) + [rkey] tmpdict[dkey] = list(set(vlist)) apis = [] if srv in dasdict: vdict = dasdict[srv] okeys = vdict["keys"] apis = vdict["apis"] + [urn] for kkk, vvv in okeys.iteritems(): vlist = tmpdict.get(kkk, []) + vvv tmpdict[kkk] = list(set(vlist)) else: apis = [urn] vdict = dict(keys=dict(tmpdict), apis=apis) dasdict[srv] = vdict mapreduce = [r for r in self.dasmgr.rawcache.get_map_reduce()] page = self.templatepage("das_services", dasdict=dasdict, daskeys=list(daskeys), mapreduce=mapreduce) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) def api(self, system, name): """ Return DAS mapping record about provided API. """ record = self.dasmgr.mapping.api_info(system, name) page = "<b>DAS mapping record</b>" page += das_json_full(record) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) def default(self, *args, **kwargs): """ Default method. """ return self.index(args, kwargs) def adjust_input(self, kwargs): """ Adjust user input wrt common DAS keyword patterns, e.g. /Zee/*/* -> dataset=*Zee*, T1_US -> site=T1_US. More ambiguous input (such as Zee -> dataset=*Zee*) is however left to be handled by the keyword search. This is active only if adjust_input is set in DAS server configuration. """ if not self.adjust: return uinput = kwargs.get("input", "") inst = kwargs.get("instance", self.dbs_global) kwargs["input"] = identify_apparent_query_patterns(uinput, inst) def _get_dbsmgr(self, inst): """ Given a string representation of DBS instance, returns DBSManager instance which "knows" how to look up datasets """ mgr = None # instance selection shall be more clean if not self.dataset_daemon: return mgr for dbs_url, dbs_inst in self.dbsmgr.keys(): if dbs_inst == inst: return self.dbsmgr[(dbs_url, dbs_inst)] return mgr def _get_kws_host(self): """ gets the host for keyword search from config. default is same server """ return self.dasconfig["load_balance"]["kws_host"] def _get_autocompl_host(self): """ gets the host for autocompletion from config. default is same server """ conf = self.dasconfig.get("load_balance", {}) return conf.get("autocompletion_host", "") def is_kws_enabled(self): """ is keyword search client (ajax request) enabled """ return self.dasconfig["keyword_search"]["kws_on"] def is_kws_service_enabled(self): """ is keyword search service (response to ajax call) enabled """ return self.dasconfig["keyword_search"]["kws_service_on"] def generate_dasquery(self, uinput, inst, html_mode=True): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) :param uinput: user's input :param inst: DBS instance :param html_mode: whether errors shall be output in html """ def error_msg(msg, show_kws=False, tmpl="das_ambiguous", **kwargs): """ Helper function which renders an error template, default is das_ambiguous, but can be overriden via tmpl param. Template has two versions: html and text for CLI. The template is passed with msg, base, guide, and **kwargs. """ guide = self.templatepage("dbsql_vs_dasql", operators=", ".join(das_operators())) # render keyword search loader, if needed kws = "" if show_kws: kws = self.templatepage( "kwdsearch_via_ajax", uinput=uinput, inst=inst or self.dbs_global, kws_host=self._get_kws_host() ) # render the appropriate template (html vs text mode) page = self.templatepage( tmpl + ("_txt" if not html_mode else ""), msg=msg, base=self.base, guide=guide, kws_enabled=show_kws, kws=kws, **kwargs ) return page if not uinput: return 1, error_msg("No input query") # Generate a DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst) except WildcardMultipleMatchesException as err: das_parser_error(uinput, str(err).replace("\n", "")) return 1, error_msg(str(err), tmpl="das_wildcard_err", suggest=err.options.values) except WildcardMatchingException as err: das_parser_error(uinput, str(type(err)) + " " + str(err)) return 1, error_msg(str(err)) except Exception as err: das_parser_error(uinput, str(type(err)) + " " + str(err)) # show multiple dataset matches for 1 keyword queries if hasattr(response, "dataset_matches_msg"): return 1, error_msg(response.dataset_matches_msg, show_kws=self.is_kws_enabled()) # for non Wildcard parsing errors, show the Keyword Search return 1, error_msg(str(err), show_kws=self.is_kws_enabled()) # DAS query validation if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find("queries") != -1: pass elif uinput.find("records") != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = "Fail to obtain service API map for this DASQuery" print msg print_exc(exc) return 1, error_msg(msg) if not service_map: return 1, error_msg("Unable to resolve the query over the " "available services: %s" % dasquery) return 0, dasquery @expose @checkargs(DAS_WEB_INPUTS) def index(self, *args, **kwargs): """ represents DAS web interface. It uses das_searchform template for input form and yui_table for output Table widget. """ uinput = getarg(kwargs, "input", "") return self.page(self.form(uinput=uinput, cards=True)) def form(self, uinput="", instance=None, view="list", cards=False): """ provide input DAS search form """ # TODO: rename into search_form()? (template is also called like this if "'" in uinput: # e.g. file.creation_date>'20120101 12:01:01' uinput = uinput.replace("'", '"') if not instance: instance = self.dbs_global cards = self.templatepage( "das_cards", base=self.base, show=cards, width=900, height=220, cards=help_cards(self.base) ) daskeys = self.templatepage("das_keys", daskeys=self.daskeyslist) page = self.templatepage( "das_searchform", input=uinput, init_dbses=list(self.dbs_instances), daskeys=daskeys, base=self.base, instance=instance, view=view, cards=cards, autocompl_host=json.dumps(self._get_autocompl_host()), ) return page @expose def error(self, msg, wrap=True): """ Show error message. """ page = self.templatepage("das_error", msg=str(msg)) if wrap: page = self.page(self.form() + page) return page @expose @checkargs(DAS_WEB_INPUTS) def gridfs(self, **kwargs): """ Retieve records from GridFS """ time0 = time.time() if "fid" not in kwargs: code = web_code("No file id") raise HTTPError(500, "DAS error, code=%s" % code) fid = kwargs.get("fid") data = {"status": "requested", "fid": fid} try: fds = self.gfs.get(ObjectId(fid)) return fds.read() except Exception as exc: print_exc(exc) code = web_code("Exception") raise HTTPError(500, "DAS error, code=%s" % code) data["ctime"] = time.time() - time0 return json.dumps(data) @expose @checkargs(DAS_WEB_INPUTS) def records(self, *args, **kwargs): """ Retieve all records id's. """ try: recordid = None if args: recordid = args[0] spec = {"_id": ObjectId(recordid)} fields = None query = dict(fields=fields, spec=spec) elif kwargs and "_id" in kwargs: spec = {"_id": ObjectId(kwargs["_id"])} fields = None query = dict(fields=fields, spec=spec) else: # return all ids query = dict(fields=None, spec={}) res = "" time0 = time.time() idx = getarg(kwargs, "idx", 0) limit = getarg(kwargs, "limit", 10) coll = kwargs.get("collection", "merge") view = kwargs.get("view", "") if view == "json": res = [] inst = kwargs.get("instance", self.dbs_global) form = self.form(uinput="") check, content = self.generate_dasquery(query, inst) if check: return self.page(form + content, ctime=time.time() - time0) dasquery = content # returned content is valid DAS query nresults = self.dasmgr.rawcache.nresults(dasquery, coll) gen = self.dasmgr.rawcache.get_from_cache(dasquery, idx=idx, limit=limit, collection=coll) if recordid: # we got id for row in gen: if view == "json": res.append(row) else: res += das_json(dasquery, row) else: for row in gen: rid = row["_id"] del row["_id"] res += self.templatepage("das_record", id=rid, collection=coll, daskeys=", ".join(row)) if recordid: page = res else: url = "/das/records?" if nresults: page = self.templatepage("das_pagination", nrows=nresults, idx=idx, limit=limit, url=url) else: page = "No results found, nresults=%s" % nresults page += res ctime = time.time() - time0 if view == "json": return json.dumps(res) page = self.page(form + page, ctime=ctime) return page except Exception as exc: print_exc(exc) return self.error(gen_error_msg(kwargs)) @jsonstreamer def datastream(self, kwargs): """Stream DAS data into JSON format""" head = kwargs.get("head", dict(timestamp=time.time())) if "mongo_query" not in head: head["mongo_query"] = head["dasquery"].mongo_query if "dasquery" in head else {} if "dasquery" in head: del head["dasquery"] if "args" in head: del head["args"] data = kwargs.get("data", []) if self.check_clients: # update client version cli, cli_msg = check_client_version() head.update({"client": cli, "client_message": cli_msg}) # for old clients setup appropriate status/reason if cli_msg: head.update({"status": "warning", "reason": cli_msg}) return head, data def get_data(self, kwargs): """ Invoke DAS workflow and get data from the cache. """ head = dict(timestamp=time.time()) head["args"] = kwargs uinput = kwargs.get("input", "") inst = kwargs.get("instance", self.dbs_global) idx = getarg(kwargs, "idx", 0) limit = getarg(kwargs, "limit", 0) # do not impose limit coll = kwargs.get("collection", "merge") status = kwargs.get("status") error = kwargs.get("error") reason = kwargs.get("reason") dasquery = kwargs.get("dasquery", None) time0 = time.time() if dasquery: dasquery = DASQuery(dasquery, instance=inst) else: check, content = self.generate_dasquery(uinput, inst, html_mode=False) if check: head.update({"status": "fail", "reason": content, "ctime": time.time() - time0, "input": uinput}) data = [] return head, data dasquery = content # returned content is valid DAS query try: nres = self.dasmgr.nresults(dasquery, coll) data = self.dasmgr.get_from_cache(dasquery, idx, limit) # check that we got what we expected data = [r for r in data] if nres and not len(data): for retry in xrange(1, 3, 5): msg = "retry in %s sec" % retry print dastimestamp("DAS WARNING "), msg, dasquery time.sleep(retry) # retry one more time data = self.dasmgr.get_from_cache(dasquery, idx, limit) data = [r for r in data] if len(data): break if nres and not len(data): msg = "fail to get all data for %s, nres=%s, len(data)=%s" % (dasquery, nres, len(data)) print dastimestamp("DAS WARNING "), msg status = "fail" reason = "Fail to retrieve data from DAS cache, please retry" if dasquery.aggregators: # aggregators split DAS record into sub-system and then # apply aggregator functions, therefore we need to correctly # account for nresults. Resolve generator into list and take # its length as nresults value. data = [r for r in data] nres = len(data) if error: # DAS record contains an error status = "error" head.update({"status": status, "nresults": nres, "ctime": time.time() - time0, "dasquery": dasquery}) except Exception as exc: status = "fail" reason = str(exc) print_exc(exc) head.update({"status": status, "ctime": time.time() - time0, "dasquery": dasquery}) data = [] head.update({"incache": self.dasmgr.incache(dasquery, coll="cache"), "apilist": self.dasmgr.apilist(dasquery)}) if reason: head.update({"reason": reason}) if status != "ok": head.update(self.info()) return head, data def info(self): "Return status of DAS server" info = {"nrequests": self.reqmgr.size(), "nworkers": self.taskmgr.nworkers(), "dasweb": self.reqmgr.status()} if self.dasmgr and self.dasmgr.taskmgr: info.update({"dascore": self.dasmgr.taskmgr.status()}) return dict(das_server=info) def busy(self): """ Check server load and report busy status if nrequests - nworkers > queue limit """ nrequests = self.reqmgr.size() if (nrequests - self.taskmgr.nworkers()) > self.queue_limit: msg = "#request=%s, queue_limit=%s, #workers=%s" % (nrequests, self.taskmgr.nworkers(), self.queue_limit) print dastimestamp("DAS WEB SERVER IS BUSY "), msg return True return False def busy_page(self, uinput=None): """DAS server busy page layout""" page = "<h3>DAS server is busy, please try later</h3>" form = self.form(uinput) return self.page(form + page) def _is_web_request(self, view): """ returns whether the current view mode is not web """ # first, check for explicit output type (view) if view in ["json", "xml", "plain"]: return False # check accept header - e.g. das client only provides accept header accepts = cherrypy.request.headers.elements("Accept") non_html_accepts = ["application/json"] other_accepted = [a for a in accepts if a.value not in non_html_accepts] # if only non html content types are accepted we are in non html mode if not other_accepted and accepts: return False return True @expose @checkargs(DAS_WEB_INPUTS) def cache(self, **kwargs): """ DAS web cache interface. Fire up new process for new requests and record its pid. The client is in charge to keep track of pid. The new process uses DAS core call to request the data into cache. Since query are cached the repeated call with the same query has no cost to DAS core. """ # do not allow caching set_no_cache_flags() # if busy return right away if self.busy(): nrequests = self.reqmgr.size() level = nrequests - self.taskmgr.nworkers() - self.queue_limit reason = "DAS server is busy" reason += ", #requests=%s, #workers=%s, queue size=%s" % ( self.reqmgr.size(), self.taskmgr.nworkds(), self.queue_limit, ) head = dict(timestamp=time.time()) head.update({"status": "busy", "reason": reason, "ctime": 0}) data = [] return self.datastream(dict(head=head, data=data)) uinput = kwargs.get("input", "").strip() if not uinput: head = {"status": "fail", "reason": "No input found", "args": kwargs, "ctime": 0, "input": uinput} data = [] return self.datastream(dict(head=head, data=data)) self.adjust_input(kwargs) pid = kwargs.get("pid", "") inst = kwargs.get("instance", self.dbs_global) uinput = kwargs.get("input", "") view = kwargs.get("view", "list") data = [] # textual views need text only error messages... check, content = self.generate_dasquery(uinput, inst, html_mode=self._is_web_request(view)) if check: head = dict(timestamp=time.time()) head.update( {"status": "fail", "reason": "Can not interpret the query" + " (while creating DASQuery)", "ctime": 0} ) if not self._is_web_request(view): head["error_details"] = content head["reason"] = head["reason"] + "\n\n" + content return self.datastream(dict(head=head, data=data)) dasquery = content # returned content is valid DAS query status, error, reason = self.dasmgr.get_status(dasquery) kwargs.update({"status": status, "error": error, "reason": reason}) if not pid: pid = dasquery.qhash if status == None and not self.reqmgr.has_pid(pid): # submit new request addr = cherrypy.request.headers.get("Remote-Addr") _evt, pid = self.taskmgr.spawn(self.dasmgr.call, dasquery, uid=addr, pid=dasquery.qhash) self.reqmgr.add(pid, kwargs) return pid if status == "ok": self.reqmgr.remove(pid) kwargs["dasquery"] = dasquery head, data = self.get_data(kwargs) return self.datastream(dict(head=head, data=data)) kwargs["dasquery"] = dasquery.storage_query if not self.pid_pat.match(str(pid)) or len(str(pid)) != 32: self.reqmgr.remove(pid) head = {"status": "fail", "reason": "Invalid pid", "args": kwargs, "ctime": 0, "input": uinput} data = [] return self.datastream(dict(head=head, data=data)) elif self.taskmgr.is_alive(pid): return pid else: # process is done, get data self.reqmgr.remove(pid) head, data = self.get_data(kwargs) return self.datastream(dict(head=head, data=data)) def get_page_content(self, kwargs, complete_msg=True): """Retrieve page content for provided set of parameters""" page = "" try: view = kwargs.get("view", "list") if view == "plain": if "limit" in kwargs: del kwargs["limit"] if view in ["json", "xml", "plain"] and complete_msg: page = "Request completed. Reload the page ..." else: head, data = self.get_data(kwargs) allowed_views = ["list", "table", "plain", "xml", "json"] if view not in allowed_views: raise func = getattr(self, view + "view") page = func(head, data) except HTTPError as _err: raise except Exception as exc: print_exc(exc) msg = gen_error_msg(kwargs) page = self.templatepage("das_error", msg=msg) return page @expose def download(self, lfn): "DAS download page for given LFN" page = self.templatepage("filemover", lfn=lfn) return self.page(page, response_div=False) @expose def makepy(self, dataset, instance): """ Request to create CMSSW py snippet for a given dataset """ pat = re.compile("/.*/.*/.*") if not pat.match(dataset): msg = "Invalid dataset name" return self.error(msg) query = "file dataset=%s instance=%s | grep file.name" % (dataset, instance) try: data = self.dasmgr.result(query, idx=0, limit=0) except Exception as exc: print_exc(exc) msg = "Exception: %s\n" % str(exc) msg += "Unable to retrieve data for query=%s" % query return self.error(msg) lfns = [] for rec in data: filename = DotDict(rec).get("file.name") if filename not in lfns: lfns.append(filename) page = self.templatepage("das_files_py", lfnList=lfns, pfnList=[]) cherrypy.response.headers["Content-Type"] = "text/plain" return page @expose @checkargs(DAS_WEB_INPUTS) def request(self, **kwargs): """ Request data from DAS cache. """ # do not allow caching set_no_cache_flags() uinput = kwargs.get("input", "").strip() if not uinput: kwargs["reason"] = "No input found" return self.redirect(**kwargs) # if busy return right away if self.busy(): return self.busy_page(uinput) time0 = time.time() self.adjust_input(kwargs) view = kwargs.get("view", "list") inst = kwargs.get("instance", self.dbs_global) uinput = kwargs.get("input", "") form = self.form(uinput=uinput, instance=inst, view=view) check, content = self.generate_dasquery(uinput, inst) if check: if view == "list" or view == "table": return self.page(form + content, ctime=time.time() - time0) else: return content dasquery = content # returned content is valid DAS query status, error, reason = self.dasmgr.get_status(dasquery) kwargs.update({"status": status, "error": error, "reason": reason}) pid = dasquery.qhash if status == None: # process new request kwargs["dasquery"] = dasquery.storage_query addr = cherrypy.request.headers.get("Remote-Addr") _evt, pid = self.taskmgr.spawn(self.dasmgr.call, dasquery, uid=addr, pid=dasquery.qhash) self.reqmgr.add(pid, kwargs) elif status == "ok" or status == "fail": self.reqmgr.remove(pid) # check if query can be rewritten via nested PK query rew_msg = self.q_rewriter and self.q_rewriter.check_fields(dasquery) if rew_msg: content = self.templatepage("das_error", msg=rew_msg) return self.page(form + content, ctime=time.time() - time0) kwargs["dasquery"] = dasquery page = self.get_page_content(kwargs, complete_msg=False) ctime = time.time() - time0 if view == "list" or view == "table": return self.page(form + page, ctime=ctime) return page if self.taskmgr.is_alive(pid): page = self.templatepage( "das_check_pid", method="check_pid", uinput=uinput, view=view, base=self.base, pid=pid, interval=self.interval, ) else: self.reqmgr.remove(pid) page = self.get_page_content(kwargs) ctime = time.time() - time0 return self.page(form + page, ctime=ctime) @expose def status(self): """Return list of all current requests in DAS queue""" requests = [r for r in self.reqmgr.items()] page = self.templatepage("das_status", requests=requests) return self.page(page) @expose @checkargs(["pid"]) def check_pid(self, pid): """ Check status of given pid. This is a server callback function for ajaxCheckPid, see js/ajax_utils.js """ # do not allow caching set_no_cache_flags() img = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base page = "" try: if self.taskmgr.is_alive(pid): page = img + " processing PID=%s" % pid else: # at this point we don't know if request arrived to this host # or it was processed. To distinguish the case we'll ask # request manager for that pid if self.reqmgr.has_pid(pid): self.reqmgr.remove(pid) page = "Request PID=%s is completed" % pid page += ", please wait for results to load" else: # there're no request on this server, re-initiate it ref = cherrypy.request.headers.get("Referer", None) if ref: url = urlparse(ref) params = dict(parse_qsl(url.query)) return self.request(**params) else: msg = "No referer in cherrypy.request.headers" msg += "\nHeaders: %s" % cherrypy.request.headers print dastimestamp("DAS WEB ERROR "), msg except Exception as err: msg = "check_pid fails for pid=%s" % pid print dastimestamp("DAS WEB ERROR "), msg print_exc(err) self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.error(gen_error_msg({"pid": pid}), wrap=False) return page def listview(self, head, data): """DAS listview data representation""" return self.repmgr.listview(head, data) def tableview(self, head, data): """DAS tabular view data representation""" return self.repmgr.tableview(head, data) def plainview(self, head, data): """DAS plain view data representation""" return self.repmgr.plainview(head, data) def xmlview(self, head, data): """DAS XML data representation""" return self.repmgr.xmlview(head, data) def jsonview(self, head, data): """DAS JSON data representation""" return self.repmgr.jsonview(head, data) @exposedasjson @enable_cross_origin @checkargs(["query", "dbs_instance"]) def autocomplete(self, **kwargs): """ Provides autocomplete functionality for DAS web UI. """ query = kwargs.get("query", "").strip() result = autocomplete_helper(query, self.dasmgr, self.daskeys) dataset = [r for r in result if r["value"].find("dataset=") != -1] dbsinst = kwargs.get("dbs_instance", self.dbs_global) if self.dataset_daemon and len(dataset): dbsmgr = self._get_dbsmgr(dbsinst) if query.find("dataset=") != -1: query = query.replace("dataset=", "") for row in dbsmgr.find(query): result.append({"css": "ac-info", "value": "dataset=%s" % row, "info": "dataset"}) return result
def setUp(self): """ set up stuff """ self.reqmgr = RequestManager(lifetime=0)
class testDAS_RegMgr(unittest.TestCase): """ A test class for the DAS RequestManager """ def setUp(self): """ set up stuff """ self.reqmgr = RequestManager(lifetime=0) def test_reqmgr(self): """Test reqmgr methods""" pid = 1 kwds = {'uinput':'bla'} self.reqmgr.add(pid, kwds) result = self.reqmgr.get(pid) self.assertEqual(kwds, result) self.reqmgr.remove(pid) result = self.reqmgr.get(pid) self.assertEqual(None, result) def test_reqmgr_tstamp(self): """Test reqmgr methods""" pid = 1 kwds = {'input':'bla'} self.reqmgr.add(pid, kwds) result = self.reqmgr.get(pid) self.assertEqual(kwds, result) time.sleep(1) self.reqmgr.clean() result = [r for r in self.reqmgr.items()] self.assertEqual([], result)
class testDAS_RegMgr(unittest.TestCase): """ A test class for the DAS RequestManager """ def setUp(self): """ set up stuff """ self.reqmgr = RequestManager(lifetime=0) def test_reqmgr(self): """Test reqmgr methods""" pid = 1 kwds = {'uinput': 'bla'} self.reqmgr.add(pid, kwds) result = self.reqmgr.get(pid) self.assertEqual(kwds, result) self.reqmgr.remove(pid) result = self.reqmgr.get(pid) self.assertEqual(None, result) def test_reqmgr_tstamp(self): """Test reqmgr methods""" pid = 1 kwds = {'input': 'bla'} self.reqmgr.add(pid, kwds) result = self.reqmgr.get(pid) self.assertEqual(kwds, result) time.sleep(1) self.reqmgr.clean() result = [r for r in self.reqmgr.items()] self.assertEqual([], result)
class DASWebService(DASWebManager): """ DAS web service interface. """ def __init__(self, dasconfig): DASWebManager.__init__(self, dasconfig) config = dasconfig['web_server'] self.pid_pat = re.compile(r'^[a-z0-9]{32}') self.base = config['url_base'] self.interval = config.get('status_update', 2500) self.engine = config.get('engine', None) nworkers = config['number_of_workers'] self.hot_thr = config.get('hot_threshold', 3000) self.dasconfig = dasconfig self.dburi = self.dasconfig['mongodb']['dburi'] self.lifetime = self.dasconfig['mongodb']['lifetime'] self.queue_limit = config.get('queue_limit', 50) if self.engine: thr_name = 'DASWebService:PluginTaskManager' self.taskmgr = PluginTaskManager(\ bus=self.engine, nworkers=nworkers, name=thr_name) self.taskmgr.subscribe() else: thr_name = 'DASWebService:TaskManager' self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) self.adjust = config.get('adjust_input', False) self.init() # Monitoring thread which performs auto-reconnection thread.start_new_thread(dascore_monitor, \ ({'das':self.dasmgr, 'uri':self.dburi}, self.init, 5)) # Obtain DBS global instance or set it as None if self.dasconfig.has_key('dbs'): self.dbs_global = \ self.dasconfig['dbs'].get('dbs_global_instance', None) self.dbs_instances = \ self.dasconfig['dbs'].get('dbs_instances', []) else: self.dbs_global = None self.dbs_instances = [] # Start DBS daemon self.dataset_daemon = config.get('dbs_daemon', False) if self.dataset_daemon: self.dbs_daemon(config) def process_requests_onhold(self): "Process requests which are on hold" try: limit = self.queue_limit/2 thread.start_new_thread(onhold_worker, \ (self.dasmgr, self.taskmgr, self.reqmgr, limit)) except Exception as exc: print_exc(exc) def dbs_daemon(self, config): """Start DBS daemon if it is requested via DAS configuration""" try: main_dbs_url = self.dasconfig['dbs']['dbs_global_url'] self.dbs_urls = [] for inst in self.dbs_instances: self.dbs_urls.append(\ main_dbs_url.replace(self.dbs_global, inst)) interval = config.get('dbs_daemon_interval', 3600) dbsexpire = config.get('dbs_daemon_expire', 3600) self.dbsmgr = {} # dbs_urls vs dbs_daemons if self.dataset_daemon: for dbs_url in self.dbs_urls: dbsmgr = DBSDaemon(dbs_url, self.dburi, expire=dbsexpire) self.dbsmgr[dbs_url] = dbsmgr def dbs_updater(_dbsmgr, interval): """DBS updater daemon""" while True: try: _dbsmgr.update() except: pass time.sleep(interval) print "Start DBSDaemon for %s" % dbs_url thread.start_new_thread(dbs_updater, (dbsmgr, interval, )) except Exception as exc: print_exc(exc) def init(self): """Init DAS web server, connect to DAS Core""" try: self.logcol = DASLogdb(self.dasconfig) self.reqmgr = RequestManager(self.dburi, lifetime=self.lifetime) self.dasmgr = DASCore(engine=self.engine) self.repmgr = CMSRepresentation(self.dasconfig, self.dasmgr) self.daskeys = self.dasmgr.das_keys() self.gfs = db_gridfs(self.dburi) self.daskeys.sort() self.dasmapping = self.dasmgr.mapping self.dasmapping.init_presentationcache() self.colors = {} for system in self.dasmgr.systems: self.colors[system] = gen_color(system) self.sitedbmgr = SiteDBService(self.dasconfig) except Exception as exc: print_exc(exc) self.dasmgr = None self.daskeys = [] self.colors = {} return # Start Onhold_request daemon if self.dasconfig['web_server'].get('onhold_daemon', False): self.process_requests_onhold() def logdb(self, query): """ Make entry in Logging DB """ qhash = genkey(query) args = cherrypy.request.params doc = dict(qhash=qhash, date=int(str(date.fromtimestamp(time.time())).replace('-', '')), headers=cherrypy.request.headers, method=cherrypy.request.method, path=cherrypy.request.path_info, args=args, ahash=genkey(args), ip=cherrypy.request.remote.ip, hostname=cherrypy.request.remote.name, port=cherrypy.request.remote.port) self.logcol.insert('web', doc) def get_nhits(self): "Return number of hits per day client made" tsec = time.mktime(date.timetuple(date.today())) spec = {'ip': cherrypy.request.remote.ip, 'ts': {'$gte': tsec}, 'args.pid': {'$exists': False}, # do not count pid requests 'path': '/cache'} # requests from das_client calls nhits = self.logcol.find(spec, count=True) return nhits @expose @checkargs(DAS_WEB_INPUTS) def redirect(self, **kwargs): """ Represent DAS redirect page """ dmsg = 'You do not have permission to access the resource requested.' msg = kwargs.get('reason', dmsg) if msg: msg = 'Reason: ' + msg page = self.templatepage('das_redirect', msg=msg) return self.page(page, response_div=False) def bottom(self, response_div=True): """ Define footer for all DAS web pages """ return self.templatepage('das_bottom', div=response_div, version=DAS.version) def page(self, content, ctime=None, response_div=True): """ Define footer for all DAS web pages """ page = self.top() page += content page += self.templatepage('das_bottom', ctime=ctime, version=DAS.version, div=response_div) return page @expose @checkargs(DAS_WEB_INPUTS + ['section', 'highlight']) def faq(self, *args, **kwargs): """ represent DAS FAQ. """ section = kwargs.get('section', None) highlight = kwargs.get('highlight', None) guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) page = self.templatepage('das_faq', guide=guide, section=section, highlight=highlight, operators=', '.join(das_operators()), aggregators=', '.join(das_aggregators())) return self.page(page, response_div=False) @expose def cli(self): """ Serve DAS CLI file download. """ dasroot = '/'.join(__file__.split('/')[:-3]) clifile = os.path.join(dasroot, 'DAS/tools/das_client.py') return serve_file(clifile, content_type='text/plain') @expose def movetodas(self): "Placeholder page for DBS to DAS migration" style = "width:600px;margin-left:auto;margin-right:auto;padding-top:20px" page = """<div style="%s">""" % style page += "Dear user,<br/>DBS Data Discovery page is depricated.<br/>" page += "Please migrate to Data Aggregation Service located at" page += "<p>https://cmsweb.cern.ch/das/</p>" page += "<em>CMS HTTP group.</em>" page += "</div>""" return page @expose def opensearch(self): """ Serve DAS opensearch file. """ if self.base and self.base.find('http://') != -1: base = self.base else: base = 'http://cmsweb.cern.ch/das' desc = self.templatepage('das_opensearch', base=base) cherrypy.response.headers['Content-Type'] = \ 'application/opensearchdescription+xml' return desc @expose @checkargs(DAS_WEB_INPUTS) def services(self, *args, **kwargs): """ represent DAS services """ dasdict = {} daskeys = [] for system, keys in self.dasmgr.mapping.daskeys().iteritems(): if system not in self.dasmgr.systems: continue tmpdict = {} for key in keys: tmpdict[key] = self.dasmgr.mapping.lookup_keys(system, key) if key not in daskeys: daskeys.append(key) dasdict[system] = dict(keys=dict(tmpdict), apis=self.dasmgr.mapping.list_apis(system)) mapreduce = [r for r in self.dasmgr.rawcache.get_map_reduce()] page = self.templatepage('das_services', dasdict=dasdict, daskeys=daskeys, mapreduce=mapreduce) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) def api(self, name, **kwargs): """ Return DAS mapping record about provided API. """ record = self.dasmgr.mapping.api_info(name) page = "<b>DAS mapping record</b>" page += das_json(record) return self.page(page, response_div=False) @expose @checkargs(DAS_WEB_INPUTS) def default(self, *args, **kwargs): """ Default method. """ return self.index(args, kwargs) def adjust_input(self, kwargs): """ Adjust user input wrt common DAS keyword patterns, e.g. Zee -> dataset=*Zee*, T1_US -> site=T1_US*. This method only works if self.adjust is set in configuration of DAS server. This method can be customization for concrete DAS applications via external free_text_parser function (part of DAS.web.utils module) """ if not self.adjust: return uinput = kwargs.get('input', '') query_part = uinput.split('|')[0] if query_part == 'queries' or query_part == 'records': return new_input = free_text_parser(uinput, self.daskeys) if uinput and new_input == uinput: selkey = choose_select_key(uinput, self.daskeys, 'dataset') if selkey and len(new_input) > len(selkey) and \ new_input[:len(selkey)] != selkey: new_input = selkey + ' ' + new_input kwargs['input'] = new_input def generate_dasquery(self, uinput, inst, html_error=True): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) """ def helper(msg, html_error=None): """Helper function which provide error template""" if not html_error: return msg guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) page = self.templatepage('das_ambiguous', msg=msg, base=self.base, guide=guide) return page if not uinput: return 1, helper('No input query') # Generate DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst) except Exception as err: return 1, helper(das_parser_error(uinput, str(err)), html_error) fields = dasquery.mongo_query.get('fields', []) if not fields: fields = [] spec = dasquery.mongo_query.get('spec', {}) for word in fields+spec.keys(): found = 0 if word in DAS_DB_KEYWORDS: found = 1 for key in self.daskeys: if word.find(key) != -1: found = 1 if not found: msg = 'Provided input does not contain a valid DAS key' return 1, helper(msg, html_error) if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find('queries') != -1: pass elif uinput.find('records') != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = 'Fail to lookup DASQuery service API map' print msg print_exc(exc) return 1, helper(msg, html_error) if not service_map: msg = "None of the API's registered in DAS " msg += "can resolve this query" return 1, helper(msg, html_error) return 0, dasquery @expose @checkargs(DAS_WEB_INPUTS) def index(self, *args, **kwargs): """ represents DAS web interface. It uses das_searchform template for input form and yui_table for output Table widget. """ uinput = getarg(kwargs, 'input', '') return self.page(self.form(uinput=uinput, cards=True)) def form(self, uinput='', instance=None, view='list', cards=False): """ provide input DAS search form """ if not instance: instance = self.dbs_global cards = self.templatepage('das_cards', base=self.base, show=cards, \ width=900, height=220, cards=help_cards(self.base)) page = self.templatepage('das_searchform', input=uinput, \ init_dbses=list(self.dbs_instances), \ base=self.base, instance=instance, view=view, cards=cards) return page @expose def error(self, msg, wrap=True): """ Show error message. """ page = self.templatepage('das_error', msg=str(msg)) if wrap: page = self.page(self.form() + page) return page @expose @checkargs(DAS_WEB_INPUTS) def gridfs(self, *args, **kwargs): """ Retieve records from GridFS """ time0 = time.time() if not kwargs.has_key('fid'): code = web_code('No file id') raise HTTPError(500, 'DAS error, code=%s' % code) fid = kwargs.get('fid') data = {'status':'requested', 'fid':fid} try: fds = self.gfs.get(ObjectId(fid)) return fds.read() except Exception as exc: print_exc(exc) code = web_code('Exception') raise HTTPError(500, 'DAS error, code=%s' % code) data['ctime'] = time.time() - time0 return json.dumps(data) @expose @checkargs(DAS_WEB_INPUTS) def records(self, *args, **kwargs): """ Retieve all records id's. """ try: recordid = None if args: recordid = args[0] spec = {'_id':ObjectId(recordid)} fields = None query = dict(fields=fields, spec=spec) elif kwargs and kwargs.has_key('_id'): spec = {'_id': ObjectId(kwargs['_id'])} fields = None query = dict(fields=fields, spec=spec) else: # return all ids query = dict(fields=None, spec={}) res = '' time0 = time.time() idx = getarg(kwargs, 'idx', 0) limit = getarg(kwargs, 'limit', 10) coll = kwargs.get('collection', 'merge') inst = kwargs.get('instance', self.dbs_global) form = self.form(uinput="") check, content = self.generate_dasquery(query, inst) if check: return self.page(form + content, ctime=time.time()-time0) dasquery = content # returned content is valid DAS query nresults = self.dasmgr.rawcache.nresults(dasquery, coll) gen = self.dasmgr.rawcache.get_from_cache\ (dasquery, idx=idx, limit=limit, collection=coll) if recordid: # we got id for row in gen: res += das_json(row) else: for row in gen: rid = row['_id'] del row['_id'] res += self.templatepage('das_record', \ id=rid, collection=coll, daskeys=', '.join(row)) if recordid: page = res else: url = '/das/records?' if nresults: page = self.templatepage('das_pagination', \ nrows=nresults, idx=idx, limit=limit, url=url) else: page = 'No results found, nresults=%s' % nresults page += res ctime = (time.time()-time0) page = self.page(form + page, ctime=ctime) return page except Exception as exc: print_exc(exc) return self.error(gen_error_msg(kwargs)) @jsonstreamer def datastream(self, kwargs): """Stream DAS data into JSON format""" head = kwargs.get('head', dict(timestamp=time.time())) if not head.has_key('mongo_query'): head['mongo_query'] = head['dasquery'].mongo_query \ if head.has_key('dasquery') else {} if head.has_key('dasquery'): del head['dasquery'] if head.has_key('args'): del head['args'] data = kwargs.get('data', []) return head, data def get_data(self, kwargs): """ Invoke DAS workflow and get data from the cache. """ head = dict(timestamp=time.time()) head['args'] = kwargs uinput = kwargs.get('input', '') inst = kwargs.get('instance', self.dbs_global) idx = getarg(kwargs, 'idx', 0) limit = getarg(kwargs, 'limit', 0) # do not impose limit coll = kwargs.get('collection', 'merge') dasquery = kwargs.get('dasquery', None) time0 = time.time() if dasquery: dasquery = DASQuery(dasquery, instance=inst) else: check, content = \ self.generate_dasquery(uinput, inst, html_error=False) if check: head.update({'status': 'fail', 'reason': content, 'ctime': time.time()-time0, 'input': uinput}) data = [] return head, data dasquery = content # returned content is valid DAS query try: nres = self.dasmgr.nresults(dasquery, coll) data = \ self.dasmgr.get_from_cache(dasquery, idx, limit) head.update({'status':'ok', 'nresults':nres, 'ctime': time.time()-time0, 'dasquery': dasquery}) except Exception as exc: print_exc(exc) head.update({'status': 'fail', 'reason': str(exc), 'ctime': time.time()-time0, 'dasquery': dasquery}) data = [] head.update({'incache':self.dasmgr.incache(dasquery, coll='cache')}) return head, data def busy(self): """ Check number server load and report busy status if it's above threashold = queue size - nworkers """ nrequests = self.reqmgr.size() if (nrequests - self.taskmgr.nworkers()) > self.queue_limit: return True return False def busy_page(self, uinput=None): """DAS server busy page layout""" page = "<h3>DAS server is busy, please try later</h3>" form = self.form(uinput) return self.page(form + page) @expose @checkargs(DAS_WEB_INPUTS) def cache(self, **kwargs): """ DAS web cache interface. Fire up new process for new requests and record its pid. The client is in charge to keep track of pid. The new process uses DAS core call to request the data into cache. Since query are cached the repeated call with the same query has no cost to DAS core. """ # remove expires records from merge collection self.dasmgr.rawcache.remove_expired('merge') # do not allow caching cherrypy.response.headers['Cache-Control'] = 'no-cache' cherrypy.response.headers['Pragma'] = 'no-cache' uinput = kwargs.get('input', '').strip() if not uinput: head = {'status': 'fail', 'reason': 'No input found', 'args': kwargs, 'ctime': 0, 'input': uinput} data = [] return self.datastream(dict(head=head, data=data)) self.adjust_input(kwargs) pid = kwargs.get('pid', '') inst = kwargs.get('instance', self.dbs_global) uinput = kwargs.get('input', '') data = [] check, content = self.generate_dasquery(uinput, inst) if check: head = dict(timestamp=time.time()) head.update({'status': 'fail', 'reason': 'Fail to create DASQuery object', 'ctime': 0}) return self.datastream(dict(head=head, data=data)) dasquery = content # returned content is valid DAS query status, qhash = self.dasmgr.get_status(dasquery) if status == 'ok': self.reqmgr.remove(dasquery.qhash) head, data = self.get_data(kwargs) return self.datastream(dict(head=head, data=data)) kwargs['dasquery'] = dasquery.storage_query if not pid and self.busy(): head = dict(timestamp=time.time()) head.update({'status': 'busy', 'reason': 'DAS server is busy', 'ctime': 0}) return self.datastream(dict(head=head, data=data)) if pid: if not self.pid_pat.match(str(pid)) or len(str(pid)) != 32: head = {'status': 'fail', 'reason': 'Invalid pid', 'args': kwargs, 'ctime': 0, 'input': uinput} data = [] return self.datastream(dict(head=head, data=data)) elif self.taskmgr.is_alive(pid): return pid else: # process is done, get data self.reqmgr.remove(pid) head, data = self.get_data(kwargs) return self.datastream(dict(head=head, data=data)) else: config = self.dasconfig.get('cacherequests', {}) thr = threshold(self.sitedbmgr, self.hot_thr, config) nhits = self.get_nhits() if nhits > thr: # exceed threshold if self.busy(): # put request onhold, server is busy tstamp = time.time() + 60*(nhits/thr) + (nhits%thr) pid = dasquery.qhash self.reqmgr.add_onhold(\ pid, uinput, cherrypy.request.remote.ip, tstamp) head = {'status':'onhold', 'mongo_query':dasquery.mongo_query, 'pid':pid, 'nresults':0, 'ctime':0, 'timestamp':time.time()} data = [] return self.datastream(dict(head=head, data=data)) addr = cherrypy.request.headers.get('Remote-Addr') _evt, pid = self.taskmgr.spawn(\ self.dasmgr.call, dasquery, addr, pid=dasquery.qhash) self.logdb(uinput) # put entry in log DB once we place a request self.reqmgr.add(pid, kwargs) return pid def get_page_content(self, kwargs, complete_msg=True): """Retrieve page content for provided set of parameters""" page = '' try: view = kwargs.get('view', 'list') if view == 'plain': if kwargs.has_key('limit'): del kwargs['limit'] if view in ['json', 'xml', 'plain'] and complete_msg: page = 'Request comlpeted. Reload the page ...' else: head, data = self.get_data(kwargs) func = getattr(self, view + "view") page = func(head, data) except HTTPError as _err: raise except Exception as exc: print_exc(exc) msg = gen_error_msg(kwargs) page = self.templatepage('das_error', msg=msg) return page @expose def makepy(self, dataset, instance): """ Request to create CMSSW py snippet for a given dataset """ pat = re.compile('/.*/.*/.*') if not pat.match(dataset): msg = 'Invalid dataset name' return self.error(msg) query = "file dataset=%s instance=%s | grep file.name" \ % (dataset, instance) try: data = self.dasmgr.result(query, idx=0, limit=0) except Exception as exc: print_exc(exc) msg = 'Exception: %s\n' % str(exc) msg += 'Unable to retrieve data for query=%s' % query return self.error(msg) lfns = [] for rec in data: filename = DotDict(rec).get('file.name') if filename not in lfns: lfns.append(filename) page = self.templatepage('das_files_py', lfnList=lfns, pfnList=[]) cherrypy.response.headers['Content-Type'] = "text/plain" return page @expose @checkargs(DAS_WEB_INPUTS) def request(self, **kwargs): """ Request data from DAS cache. """ # remove expires records from merge collection self.dasmgr.rawcache.remove_expired('merge') # do not allow caching cherrypy.response.headers['Cache-Control'] = 'no-cache' cherrypy.response.headers['Pragma'] = 'no-cache' uinput = kwargs.get('input', '').strip() if not uinput: kwargs['reason'] = 'No input found' return self.redirect(**kwargs) time0 = time.time() self.adjust_input(kwargs) view = kwargs.get('view', 'list') inst = kwargs.get('instance', self.dbs_global) uinput = kwargs.get('input', '') if self.busy(): return self.busy_page(uinput) ahash = genkey(cherrypy.request.params) self.logdb(uinput) form = self.form(uinput=uinput, instance=inst, view=view) check, content = self.generate_dasquery(uinput, inst) if check: if view == 'list' or view == 'table': return self.page(form + content, ctime=time.time()-time0) else: return content dasquery = content # returned content is valid DAS query status, qhash = self.dasmgr.get_status(dasquery) if status == 'ok': page = self.get_page_content(kwargs, complete_msg=False) ctime = (time.time()-time0) if view == 'list' or view == 'table': return self.page(form + page, ctime=ctime) return page else: kwargs['dasquery'] = dasquery.storage_query addr = cherrypy.request.headers.get('Remote-Addr') _evt, pid = self.taskmgr.spawn(self.dasmgr.call, dasquery, addr, pid=dasquery.qhash) self.reqmgr.add(pid, kwargs) if self.taskmgr.is_alive(pid): page = self.templatepage('das_check_pid', method='check_pid', uinput=uinput, view=view, ahash=ahash, base=self.base, pid=pid, interval=self.interval) else: page = self.get_page_content(kwargs) self.reqmgr.remove(pid) ctime = (time.time()-time0) return self.page(form + page, ctime=ctime) @expose def requests(self): """Return list of all current requests in DAS queue""" page = "" count = 0 for row in self.reqmgr.items(): page += '<li>%s placed at %s<br/>%s</li>' \ % (row['_id'], row['timestamp'], row['kwds']) count += 1 if page: page = "<ul>%s</ul>" % page else: page = "The request queue is empty" if count: page += '<div>Total: %s requests</div>' % count return self.page(page) @expose @checkargs(['pid', 'ahash']) def check_pid(self, pid, ahash): """ Check status of given pid and return appropriate page content. This is a server callback function for ajaxCheckPid, see js/ajax_utils.js """ cherrypy.response.headers['Cache-Control'] = 'no-cache' cherrypy.response.headers['Pragma'] = 'no-cache' img = '<img src="%s/images/loading.gif" alt="loading"/>' % self.base page = '' try: if self.taskmgr.is_alive(pid): page = img + " processing PID=%s" % pid else: kwargs = self.reqmgr.get(pid) if kwargs and kwargs.has_key('dasquery'): del kwargs['dasquery'] # if no kwargs (another request delete it) # use logging DB to look-up user request via ahash if not kwargs: spec = {'ahash':ahash} skey = [('ts', DESCENDING)] res = [r for r in self.logcol.find(spec).sort(skey)] kwargs = res[0]['args'] self.adjust_input(kwargs) self.reqmgr.remove(pid) page = self.get_page_content(kwargs) except Exception as err: msg = 'check_pid fails for pid=%s' % pid print dastimestamp('DAS WEB ERROR '), msg print_exc(err) self.reqmgr.remove(pid) self.taskmgr.remove(pid) return self.error(gen_error_msg({'pid':pid}), wrap=False) return page def listview(self, head, data): """DAS listview data representation""" return self.repmgr.listview(head, data) def tableview(self, head, data): """DAS tabular view data representation""" return self.repmgr.tableview(head, data) def plainview(self, head, data): """DAS plain view data representation""" return self.repmgr.plainview(head, data) def xmlview(self, head, data): """DAS XML data representation""" return self.repmgr.xmlview(head, data) def jsonview(self, head, data): """DAS JSON data representation""" return self.repmgr.jsonview(head, data) @exposedasjson @checkargs(['query', 'dbs_instance']) def autocomplete(self, **kwargs): """ Provides autocomplete functionality for DAS web UI. """ query = kwargs.get("query", "").strip() result = autocomplete_helper(query, self.dasmgr, self.daskeys) dataset = [r for r in result if r['value'].find('dataset=')!=-1] dbsinst = kwargs.get('dbs_instance', self.dbs_global) if self.dataset_daemon and len(dataset): dbs_urls = [d for d in self.dbsmgr.keys() if d.find(dbsinst) != -1] if len(dbs_urls) == 1: dbsmgr = self.dbsmgr[dbs_urls[0]] if query.find('dataset=') != -1: query = query.replace('dataset=', '') for row in dbsmgr.find(query): result.append({'css': 'ac-info', 'value': 'dataset=%s' % row, 'info': 'dataset'}) return result