def _query(query): """ :type query: str """ ret = Query(query).search() ret.records = (get_record(request)(recid) for recid in ret.recids) return ret
def _quick_match_record(obj, eng): keys = keys_to_check # needed due to outer scope issues if not keys: # At least try the recid keys = [("control_number", "control_number")] from invenio_records.api import Record from invenio_search.api import Query try: record = Record(obj.data.dumps()) except AttributeError: record = Record(obj.data) for key, field in keys: values = record[key] if key in record else None if values: if not isinstance(values, list): values = [values] for val in values: if field: query_string = '{0}:"{1}"'.format(field, val) else: query_string = '"{0}"'.format(val) query = Query(query_string) result = query.search(collection=collection) if len(result) > 0: return True return False
def test_query_filter(app, user_factory): """Test post filter.""" urlargs = MultiDict() defs = dict( type=terms_filter('type'), subtype=terms_filter('subtype'), ) with app.test_request_context("?type=test"): q = Query("value") body = q.body['query'] query, args = _query_filter(q, urlargs, defs) assert 'post_filter' not in query.body assert query.body['query']['filtered']['query'] == body assert query.body['query']['filtered']['filter'] == \ dict( bool=dict( filter=[dict(terms=dict(type=['test']))] ), ) assert args['type'] == 'test' with app.test_request_context("?anotertype=test"): q = Query("value") body = q.body['query'] query, args = _query_filter(q, urlargs, defs) assert query.body['query'] == body
def citations(self): """Citation export for single record in datatables format. :returns: list List of lists where every item represents a datatables row. A row consists of [reference, num_citations] """ from invenio_search.api import Query out = [] row = [] recid = self.record['control_number'] es_query = Query('refersto:' + str(recid)).search() es_query.body.update({ 'sort': [{'citation_count': {'order': 'desc'}}], 'size': 9999 }) citations = es_query.records() for citation in citations: row.append(render_template_to_string("citations.html", record=citation)) row.append(citation.get('citation_count', '')) out.append(row) row = [] return out
def _query(query): """ :type query: str ..note:: `get_record` is used so that `invenio_records` is kept up to date. """ ret = Query(query).search() ret.records = (get_record(recid) for recid in ret.recids) return ret
def search(query, sorting={}): """Return a ready Holding Pen search object from query and sorting.""" results = Query(query) response = results.search() response.index = cfg["WORKFLOWS_HOLDING_PEN_INDEX"] if sorting: response.body.update(sorting) # FIXME pagination response.body["size"] = 99999999 return response
def get_kbd_values_by_def(confdict, searchwith=""): """Return a list of values by searching a dynamic kb. :param confdict: dictionary with keys "field", "expression" and "collection" name :param searchwith: a term to search with :return: list of values """ from invenio_search.api import Query # get the configuration so that we see what the field is if not confdict: return [] if 'field' not in confdict: return [] field = confdict['field'] expression = confdict['expression'] collection = "" if 'collection' in confdict: collection = confdict['collection'] if searchwith and expression: if (expression.count('%') > 0): expression = expression.replace("%", searchwith) response = Query(expression).search(collection=collection) else: # no %.. just make a combination expression = expression + " and " + searchwith response = Query(expression).search(collection=collection) else: # either no expr or no searchwith.. but never mind about searchwith if expression: # in this case: only expression response = Query(expression).search(collection=collection) else: # make a fake expression so that only records that have this field # will be returned fake_exp = "/.*/" if searchwith: fake_exp = searchwith response = Query("{0}:{1}".format( field, fake_exp)).search(collection=collection) # TODO wait for new search API for pagination response.body["size"] = 9999999 values = [] for record in response.records(): value = record.get(field) if value: values.append(value) return values
def references(self): """Reference export for single record in datatables format. :returns: list List of lists where every item represents a datatables row. A row consists of [reference, num_citations] """ from invenio_search.api import Query out = [] references = self.record.get('references') if references: refs_to_get_from_es = [ ref['recid'] for ref in references if ref.get('recid') ] es_query = ' or '.join( ['control_number:' + str(recid) for recid in refs_to_get_from_es] ) es_query = Query(es_query).search() es_query.body.update({ 'size': 9999 }) refs_from_es = { record['control_number']: record for record in es_query.records() } for reference in references: row = [] if 'recid' in reference: recid = reference['recid'] ref_record = refs_from_es.get(str(recid)) if ref_record: row.append(render_template_to_string( "references.html", record=ref_record, reference=reference )) row.append(ref_record.get('citation_count', '')) out.append(row) continue row.append(render_template_to_string( "references.html", reference=reference)) row.append('') out.append(row) return out
def test_aggregations(app, user_factory): """Test aggregations.""" with app.test_request_context(""): q = Query("value") defs = dict(type=dict(terms=dict(field="upload_type"), ), subtype=dict(terms=dict(field="subtype"), )) assert _aggregations(q, defs).body['aggs'] == defs
def get_record_index(record): """Decide which index the record should go to.""" query = 'collection:"{collection}"' for collection, index in six.iteritems( cfg["SEARCH_ELASTIC_COLLECTION_INDEX_MAPPING"]): if Query(query.format(collection=collection)).match(record): return index
def number_of_records(collection_name): """Returns number of records for the collection.""" query = Query() index = collection_to_index(collection_name) result = es.count(index=index, body=query.body) return result['count']
def index_collection_percolator(name, dbquery): """Create an elasticsearch percolator for a given query.""" from invenio_search.api import Query from invenio_search.walkers.elasticsearch import ElasticSearchDSL es.index(index='records', doc_type='.percolator', body={'query': Query(dbquery).query.accept(ElasticSearchDSL())}, id=name)
def index_collection_percolator(name, dbquery): """Create an elasticsearch percolator for a given query.""" indices = set(cfg["SEARCH_ELASTIC_COLLECTION_INDEX_MAPPING"].values()) indices.add(cfg['SEARCH_ELASTIC_DEFAULT_INDEX']) for index in indices: es.index( index=index, doc_type='.percolator', body={'query': Query(dbquery).query.accept(ElasticSearchDSL())}, id=name)
def test_doublequoted_author_search(self): from invenio_search.api import Query query_1 = Query('find a Maldacena, Juan Martin ').search().records() query_1_results = [] for record in query_1: query_1_results.append(record.get("control_number")) query_2 = Query('find a j m maldacena').search().records() query_2_results = [] for record in query_2: query_2_results.append(record.get("control_number")) query_3 = Query('author: " j m maldacena "').search().records() query_3_results = [] for record in query_3: query_3_results.append(record.get("control_number")) query_4 = Query('author: "j M maLDaceNa"').search().records() query_4_results = [] for record in query_4: query_4_results.append(record.get("control_number")) self.assertEqual(query_1_results, query_2_results) self.assertEqual(query_2_results, query_3_results) self.assertEqual(query_3_results, query_4_results)
def requested_recids(self): """Search given `self.filter_pattern` and `self.filter_records`. :rtype: intbitset""" # TODO: Use self.option_consider_deleted_records when it's available pattern = self.filter_pattern or '' recids = Query(pattern).search().recids if self.filter_records is not None: recids &= self.filter_records return recids
def _queries(): """Preprocess collection queries.""" from invenio.ext.sqlalchemy import db from invenio_collections.models import Collection return dict( (collection.name, dict(query=Query( COLLECTIONS_DELETED_RECORDS.format(dbquery=collection.dbquery)), ancestors=set(c.name for c in collection.ancestors if c.dbquery is None))) for collection in Collection.query.filter( Collection.dbquery.isnot(None), db.not_(Collection.dbquery.like('hostedcollection:%'))).all())
def test_default_facets_factory(app, user_factory): """Test aggregations.""" defs = dict( aggs=dict(type=dict(terms=dict(field="upload_type"), ), subtype=dict(terms=dict(field="subtype"), )), filters=dict(subtype=terms_filter('subtype'), ), post_filters=dict(type=terms_filter('type'), ), ) app.config['RECORDS_REST_FACETS']['testidx'] = defs with app.test_request_context("?type=a&subtype=b"): q = Query("value") query, urlkwargs = default_facets_factory(q, 'testidx') assert query.body['aggs'] == defs['aggs'] assert 'post_filter' in query.body assert 'filtered' in query.body['query'] q = Query("value") query, urlkwargs = default_facets_factory(q, 'anotheridx') assert 'aggs' not in query.body assert 'post_filter' not in query.body assert 'filtered' not in query.body['query']
def render_citations(recid): """Citation export for single record in datatables format. :returns: list List of lists where every item represents a datatables row. A row consists of [reference, num_citations] """ out = [] row = [] es_query = Query('refersto:' + str(recid)).search() es_query.body.update({ 'sort': [{'citation_count': {'order': 'desc'}}] }) citations = es_query.records() for citation in citations: row.append(render_template_to_string("citations.html", record=citation, reference=None)) row.append(citation.get('citation_count', '')) out.append(row) row = [] return out
def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0): """Search for complex pattern 'p' within field 'f' according to matching type 'm'. Return hitset of recIDs. The function uses multi-stage searching algorithm in case of no exact match found. See the Search Internals document for detailed description. The 'ap' argument governs whether an alternative patterns are to be used in case there is no direct hit for (p,f,m). For example, whether to replace non-alphanumeric characters by spaces if it would give some hits. See the Search Internals document for detailed description. (ap=0 forbits the alternative pattern usage, ap=1 permits it.) 'ap' is also internally used for allowing hidden tag search (for requests coming from webcoll, for example). In this case ap=-9 The 'of' argument governs whether to print or not some information to the user in case of no match found. (Usually it prints the information in case of HTML formats, otherwise it's silent). The 'verbose' argument controls the level of debugging information to be printed (0=least, 9=most). All the parameters are assumed to have been previously washed. This function is suitable as a mid-level API. """ if f is None: from invenio_search.api import Query results = Query(p).search() else: results = search_unit(p, f, m, wl=wl) import warnings warnings.warn( 'Deprecated search_pattern(p={0}, f={1}, m={2}) = {3}.'.format( p, f, m, results), stacklevel=2) return results
def create_update_jobs_by_collection( batch_template_file, collection, job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS): """ Creates the job description files to update a whole collection @param batch_template_file: fullpath to the template for the update @type batch_tempalte_file: string @param collection: name of the collection that should be updated @type collection: string @param job_directory: fullpath to the directory storing the job files @type job_directory: string """ from invenio_search.api import Query recids = Query().search(collection=collection) return create_update_jobs_by_recids(recids, batch_template_file, job_directory)
def get_unique_record_json(param): """API to query records from the database.""" from .api import get_record from invenio_search.api import Query data, query = {}, {} data['status'] = 'notfound' recid = Query(param).search() if len(recid) == 1: query = get_record(recid[0]).dumps(clean=True) data['status'] = 'success' elif len(recid) > 1: data['status'] = 'multiplefound' data['source'] = 'database' data['query'] = query return data
def create_update_jobs_by_search(pattern, batch_template_file, job_directory=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS ): """ Creates the job description files to update all records that fit a search pattern. Be aware of the search limitations! @param search_pattern: The pattern to search for @type search_pattern: string @param batch_template_file: fullpath to the template for the update @type batch_tempalte_file: string @param job_directory: fullpath to the directory storing the job files @type job_directory: string """ from invenio_search.api import Query recids = Query(pattern).search() return create_update_jobs_by_recids(recids, batch_template_file, job_directory)
def link_to_hep_affiliation(record): try: icn = record['ICN'] except KeyError: return '' query = Query("affiliation:%s" % icn) result = es.search(body=query.body) records = result['hits']['hits'] if len(records): if len(records) == 1: return str(len(records)) + ' Paper from ' + \ str(record['ICN']) else: return str(len(records)) + ' Papers from ' + \ str(record['ICN']) else: return ''
def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=None, sf="", so="a", sp="", rm="", of="id", ot="", aas=0, p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0, recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab="", wl=0, em=""): kwargs = prs_wash_arguments(req=req, cc=cc, c=c, p=p, f=f, rg=rg, sf=sf, so=so, sp=sp, rm=rm, of=of, ot=ot, aas=aas, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, jrec=jrec, recid=recid, recidb=recidb, sysno=sysno, id=id, idb=idb, sysnb=sysnb, action=action, d1=d1, d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, verbose=verbose, ap=ap, ln=ln, ec=ec, tab=tab, wl=wl, em=em) import warnings warnings.warn('Deprecated perform_request_search({}).'.format(str(kwargs)), stacklevel=2) from invenio_search.api import Query p = create_add_to_search_pattern(p, p1, f1, m1, "") p = create_add_to_search_pattern(p, p2, f2, m2, op1) p = create_add_to_search_pattern(p, p3, f3, m3, op2) return Query(p).search(collection=cc)
def get_kbd_values_by_def(confdict, searchwith=""): """Return a list of values by searching a dynamic kb. :param confdict: dictionary with keys "field", "expression" and "collection" name :param searchwith: a term to search with :return: list of values """ from invenio_search.api import Query # get the configuration so that we see what the field is if not confdict: return [] if 'field' not in confdict: return [] field = confdict['field'] expression = confdict['expression'] collection = "" if 'collection' in confdict: collection = confdict['collection'] if searchwith and expression: if (expression.count('%') > 0): expression = expression.replace("%", searchwith) response = Query(expression).search(collection=collection) else: # no %.. just make a combination expression = expression + " and " + searchwith response = Query(expression).search(collection=collection) else: # either no expr or no searchwith.. but never mind about searchwith if expression: # in this case: only expression response = Query(expression).search(collection=collection) else: # make a fake expression so that only records that have this field # will be returned fake_exp = "/.*/" if searchwith: fake_exp = searchwith response = Query("{0}:{1}".format(field, fake_exp)).search( collection=collection ) # TODO wait for new search API for pagination response.body["size"] = 9999999 values = [] for record in response.records(): value = record.get(field) if value: values.append(value) return values
def test_default_sorter_factory(app, user_factory): """Test default sorter factory.""" app.config["RECORDS_REST_SORT_OPTIONS"] = dict( myindex=dict(myfield=dict(fields=['field1', '-field2'], )), ) app.config["RECORDS_REST_DEFAULT_SORT"] = dict(myindex=dict( query='-myfield', noquery='myfield', ), ) # Sort with app.test_request_context("?sort=myfield"): query, urlargs = default_sorter_factory(Query("value"), 'myindex') assert query.body['sort'] == \ [{'field1': {'order': 'asc'}}, {'field2': {'order': 'desc'}}] assert urlargs['sort'] == 'myfield' # Reverse sort with app.test_request_context("?sort=-myfield"): query, urlargs = default_sorter_factory(Query("value"), 'myindex') assert query.body['sort'] == \ [{'field1': {'order': 'desc'}}, {'field2': {'order': 'asc'}}] assert urlargs['sort'] == '-myfield' # Invalid sort key with app.test_request_context("?sort=invalid"): query, urlargs = default_sorter_factory(Query("value"), 'myindex') assert 'sort' not in query.body assert urlargs == {} # Default sort without query with app.test_request_context("/?q="): query, urlargs = default_sorter_factory(Query("value"), 'myindex') assert query.body['sort'] == \ [{'field1': {'order': 'asc'}}, {'field2': {'order': 'desc'}}] assert urlargs == dict(sort='myfield') # Default sort with query with app.test_request_context("/?q=test"): query, urlargs = default_sorter_factory(Query("value"), 'myindex') assert query.body['sort'] == \ [{'field1': {'order': 'desc'}}, {'field2': {'order': 'asc'}}] assert urlargs == dict(sort='-myfield') # Default sort another index with app.test_request_context("/?q=test"): query, urlargs = default_sorter_factory(Query("value"), 'aidx') assert 'sort' not in query.body
def number_of_search_results(query, collection_name): """ Filter used to show total number of results out of filtered ones. """ session_key = 'last-query' + query + collection_name if session.get(session_key): query_timestamp = session[session_key]['timestamp'] seconds_since_query = ( datetime.datetime.utcnow() - query_timestamp).total_seconds() if seconds_since_query < 300: # Only use the session value if it is newer than 5 minutes # This should allow for the common use case of navigating # facets and avoid using an outdated value when using a direct # link return session[session_key][ "number_of_hits" ] query = Query(query) index = collection_to_index(collection_name) result = es.count(index=index, body=query.body) return result['count']
def generate_booktitle(record): booktitle = '' pubinfo = '' if 'publication_info' in record: pubinfo = record['publication_info'] for field in pubinfo: if 'reportnumber' in field: rn = field['reportnumber'] if rn: acronym = field['acronym'] if acronym: booktitle = "%s: %s" % ( rn, acronym, ) else: records = Query("reportnumber:%s" % (rn, )).search().records() if records: rec = records[0] for title in rec['titles']: booktitle = title.get('title', "") if title.get('subtitle'): booktitle += ': ' + title.get('subtitle') if not booktitle: result = [] for field in pubinfo: if 'pubinfo_freetext' in field: result.append(field['pubinfo_freetext']) if result: if any(isinstance(i, list) for i in result): nested_list = list(traverse(result)) booktitle = ', '.join(str(title) for title in nested_list) else: booktitle = ', '.join(str(title) for title in result) return booktitle
def proceedings_link(record): cnum = record.get('cnum', '') out = '' if not cnum: return out query = Query("cnum:%s and 980__a:proceedings" % (cnum,)) result = es.search(body=query.body) records = result['hits']['hits'] if len(records): if len(records) > 1: proceedings = [] for i, record in enumerate(records, start=1): try: dois = record['dois'] proceedings.append( '<a href="/record/{recid}">#{i}</a> (DOI: <a ' 'href="http://dx.doi.org/{doi}">{doi}</a>'.format( recid=record['control_number'], doi=dois[0]['value'], i=i)) except KeyError: # Guards both against records not having a "dois" field # and doi values not having a "value" field. proceedings.append( '<a href="/record/{recid}">#{i}</a>'.format( recid=record['control_number'], i=i)) out = 'Proceedings: ' out += ', '.join(proceedings) else: out += '<a href="/record/{recid}">Proceedings</a>'.format( recid=records[0]['control_number']) return out
def record_brief(): from invenio_search.api import Query records = Query(request.args['query']).search().records()[:5] return ''.join(render_template('format/record/Default_HTML_brief.tpl', record=i) for i in records)
def test_citation_count_after_insert(self): for key in self.expected_citation_counts_after_insert: updated_cited_record = Query('control_number:' + str(key)).search().records()[0] self.citation_counts_after_insert[key] = updated_cited_record.get('citation_count') self.assertEqual(self.expected_citation_counts_after_insert, self.citation_counts_after_insert)
def serialize(self, pid, record, links_factory=None): """ Serialize a single impact graph from a record. :param pid: Persistent identifier instance. :param record: Record instance. :param links_factory: Factory function for the link generation, which are added to the response. """ out = {} # Add information about current record out['inspire_id'] = record['control_number'] out['title'] = get_title(record) out['year'] = record['earliest_date'].split('-')[0] # Get citations citations = [] es_query = Query('refersto:' + record['control_number']) es_query.body.update({'size': 9999}) record_citations = current_search_client.search(index='records-hep', doc_type='hep', body=es_query.body, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ])['hits']['hits'] for citation in record_citations: citation = citation['_source'] citations.append({ "inspire_id": citation['control_number'], "citation_count": citation.get('citation_count', 0), "title": get_title(citation), "year": citation['earliest_date'].split('-')[0] }) out['citations'] = citations # Get references record_references = record.get('references', []) references = [] reference_recids = [ ref['recid'] for ref in record_references if ref.get('recid') ] if reference_recids: mget_body = {"ids": reference_recids} record_references = current_search_client.mget( index='records-hep', doc_type='hep', body=mget_body, _source=[ 'control_number', 'citation_count', 'titles', 'earliest_date' ]) for reference in record_references["docs"]: ref_info = reference["_source"] references.append({ "inspire_id": ref_info['control_number'], "citation_count": ref_info.get('citation_count', 0), "title": get_title(ref_info), "year": ref_info['earliest_date'].split('-')[0] }) out['references'] = references return json.dumps(out)