def get_similar(self, **kwargs): from adsabs.core.solr import get_document_similar q = "%s:%s" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD]) with statsd.timer("core.solr.similar.query_response_time"): resp = get_document_similar(q, **kwargs) return resp
def get_citations(self, **kwargs): """ Returns the list of citations """ q = "citations(%s:%s)" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD]) with statsd.timer("core.solr.citations.query_response_time"): resp = solr.query(q, **kwargs) return resp
def get_document(identifier, **kwargs): q = "identifier:%s" % identifier with statsd.timer("core.solr.document.query_response_time"): resp = solr.query(q, rows=1, fields=config.SOLR_SEARCH_DEFAULT_FIELDS, **kwargs) if resp.get_hits() == 1: return resp.get_doc_object(0) else: return None
def get_toc(self, **kwargs): """ Returns the table of contents It queries SOLR for the first 13 characters of the bibcode and "*" If the 14th character is a "E" I add also this before the "*" """ bibcode = self.bibcode if bibcode[13] == 'E': bibquery = bibcode[:14] else: bibquery = bibcode[:13] q = "bibcode:%s*" % bibquery with statsd.timer("core.solr.toc.query_response_time"): resp = solr.query(q, **kwargs) return resp
def get_suggestions(**args): timer = statsd.timer("bibutils.get_suggestions.generate_time") timer.start() # initializations papers = [] bibcodes = [] if 'bibcodes' in args: bibcodes = args['bibcodes'] if len(bibcodes) == 0: return [] # Any overrides for default values? if 'Nsuggest' in args: Nsuggestions = args['Nsuggest'] else: Nsuggestions = config.BIBUTILS_DEFAULT_SUGGESTIONS if 'fmt' in args: output_format = args['fmt'] else: output_format = config.BIBUTILS_DEFAULT_FORMAT # get rid of potential trailing spaces bibcodes = map(lambda a: a.strip(), bibcodes)[:config.BIBUTILS_MAX_INPUT] # start processing # get the citations for all publications (keeping multiplicity is essential) cits = get_citing_papers(bibcodes=bibcodes) # clean up cits cits = filter(lambda a: len(a) > 0, cits) # get references refs = get_references(bibcodes=bibcodes) # clean up refs refs = filter(lambda a: len(a) > 0, refs) # removes papers from the original list to get candidates papers = filter(lambda a: a not in bibcodes, cits + refs) # establish frequencies of papers in results paperFreq = [(k,len(list(g))) for k, g in groupby(sorted(papers))] # and sort them, most frequent first paperFreq = sorted(paperFreq, key=operator.itemgetter(1),reverse=True) # remove all papers with frequencies smaller than threshold paperFreq = filter(lambda a: a[1] > config.BIBUTILS_THRESHOLD_FREQUENCY and a[1] < len(bibcodes), paperFreq) # get metadata for suggestions meta_dict = get_meta_data(results=paperFreq[:Nsuggestions]) timer.stop() # return results in required format if output_format == 'score': return [{'bibcode':x,'score':y, 'title':meta_dict[x]['title'], 'author':meta_dict[x]['author']} for (x,y) in paperFreq[:Nsuggestions] if x in meta_dict.keys()] else: return [{'bibcode':x,'score':'NA', 'title':meta_dict[x]['title'], 'author':meta_dict[x]['author']} for (x,y) in paperFreq[:Nsuggestions] if x in meta_dict.keys()]
def generate_metrics(**args): timer = statsd.timer("bibutils.generate_metrics.generate_time") timer.start() # First we gather the necessary 'attributes' for all publications involved # (see above methods for more details) attr_list,num_cit,num_cit_ref = get_attributes(args) # What types of metrics are we gather (everything by default) stats_models = [] # Determine the output format (really only used to get the 'legacy format') format = args.get('fmt','') model_types = args.get('types',config.METRICS_DEFAULT_MODELS) # Instantiate the metrics classes, defined in the 'models' module for model_class in metricsmodels.data_models(models=model_types.split(',')): model_class.attributes = attr_list model_class.num_citing = num_cit model_class.num_citing_ref = num_cit_ref model_class.results = {} stats_models.append(model_class) # The metrics calculations are sent off in parallel # rez=Pool(config.METRICS_THREADS).map(generate_data, stats_models) po = Pool() rez = po.map_async(generate_data, stats_models) model_results = rez.get() # Now shape the results in the final format results = format_results(model_results) timer.stop() # Send the result back to our caller if format == 'legacy': return legacy_format(results) elif format == 'API': for key in results.keys(): newkey = key.replace(' ','_') results[newkey] = results.pop(key) if not 'histogram' in key or not 'series' in key: for kee in results[newkey].keys(): results[newkey][kee.replace(' ','_')] = results[newkey].pop(kee) return results else: return results
def search(): """ returns the results of a search """ if not len(request.values): form = QueryForm(csrf_enabled=False) # prefill the database select menu option form.db_f.default = config.SEARCH_DEFAULT_DATABASE else: form = QueryForm.init_with_defaults(request.values) if form.validate(): query_components = QueryBuilderSearch.build(form, request.values) bigquery_id = request.values.get('bigquery') try: req = solr.create_request(**query_components) url = None if bigquery_id: prepare_bigquery_request(req, request.values['bigquery']) url = config.SOLRBIGQUERY_URL req = solr.set_defaults(req, query_url=url) with statsd.timer("search.solr.query_response_time"): resp = solr.get_response(req) statsd.incr("search.solr.executed") if bigquery_id: facets = resp.get_facet_parameters() facets.append(('bigquery', bigquery_id)) except Exception, e: statsd.incr("search.solr.failed") raise AdsabsSolrqueryException("Error communicating with search service", sys.exc_info()) if resp.is_error(): statsd.incr("search.solr.error") flash(resp.get_error_message(), 'error') return render_template('search_results.html', resp=resp, form=form, query_components=query_components, bigquery_id=bigquery_id) else:
def set_statsd_context(): g.statsd_context = "%s.%s" % (request.endpoint, request.method) g.total_request_timer = statsd.timer(g.statsd_context + ".response_time") g.total_request_timer.start()
def get_coreads(self, **kwargs): """returns the results of the 'trending' 2nd order operator""" q = "trending(%s:%s)" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD]) with statsd.timer("core.solr.coreads.query_response_time"): resp = solr.query(q, **kwargs) return resp