def records_query(document_id, state): try: rows = [int(r) for r in state.getlist('row')] except: rows = [] score_query = state.has_text or len(rows) shoulds = records_query_shoulds(state) if not len(shoulds): shoulds = [match_all()] if len(rows): shoulds.append({ "constant_score": { "filter": { 'terms': { 'index': rows } }, "boost": 1000 } }) query = records_query_internal(document_id, shoulds, size=state.limit) query['query'] = filter_query(query['query'], state.filters) query['from'] = state.offset sort = [{'index': 'asc'}, {'page': 'asc'}] if score_query: sort.insert(0, '_score') return query
def entities_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if text is None or not len(text): q = match_all() else: q = { "query_string": { "query": text, "fields": [ 'name^15', 'name_latin^5', 'terms^12', 'terms_latin^3', 'summary^10', 'summary_latin^7', 'description^5', 'description_latin^3' ], "default_operator": "AND", "use_dis_max": True } } q = authz_filter(q) filters = parse_filters(args) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') aggs = aggregate(q, aggs, facets) sort_mode = args.get('sort', '').strip().lower() default_sort = 'score' if len(text) else 'doc_count' sort_mode = sort_mode or default_sort if sort_mode == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif sort_mode == 'alphabet': sort = [{'name': 'asc'}, '_score'] elif sort_mode == 'score': sort = ['_score'] return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def text_query(text): """Part of a query which finds a piece of text.""" if text is None or not len(text.strip()): return match_all() return { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(text), child_record({"bool": { "should": [text_query_string(text)] }}) ] } }
def text_query(text): """Part of a query which finds a piece of text.""" if text is None or not len(text.strip()): return match_all() return { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(text), child_record({ "bool": { "should": [text_query_string(text)] } }) ] } }
def entities_query(args, fields=None, facets=True): """Parse a user query string, compose and execute a query.""" if not isinstance(args, MultiDict): args = MultiDict(args) text = args.get('q', '').strip() if text is None or not len(text): q = match_all() else: q = { "query_string": { "query": text, "fields": ['name^15', 'name_latin^5', 'terms^12', 'terms_latin^3', 'summary^10', 'summary_latin^7', 'description^5', 'description_latin^3'], "default_operator": "AND", "use_dis_max": True } } q = authz_filter(q) filters = parse_filters(args) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = args.getlist('facet') if 'collections' in facets: aggs = facet_collections(q, aggs, filters) facets.remove('collections') aggs = aggregate(q, aggs, facets) sort_mode = args.get('sort', '').strip().lower() default_sort = 'score' if len(text) else 'doc_count' sort_mode = sort_mode or default_sort if sort_mode == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif sort_mode == 'alphabet': sort = [{'name': 'asc'}, '_score'] elif sort_mode == 'score': sort = ['_score'] return { 'sort': sort, 'query': filter_query(q, filters, OR_FIELDS), 'aggregations': aggs, '_source': fields or DEFAULT_FIELDS }
def links_query(origin, state): """Parse a user query string, compose and execute a query.""" if state.has_text: q = { "query_string": { "query": state.text, "fields": ['name^5', 'names^2', 'text'], "default_operator": "AND", "use_dis_max": True } } else: q = match_all() ids = origin.get('ids') or [origin.get('id')] q = add_filter(q, {'terms': {'origin.id': ids}}) q = authz_filter(q, state.authz, roles=True) aggs = {'scoped': {'global': {}, 'aggs': {}}} aggs = aggregate(state, q, aggs, state.facet_names) if state.sort == 'score': sort = ['_score'] else: sort = [{ 'properties.start_date': 'desc' }, { 'properties.end_date': 'desc' }] q = { 'sort': sort, 'query': filter_query(q, state.filters), 'aggregations': aggs, 'size': state.limit, 'from': state.offset, '_source': DEFAULT_FIELDS } result, hits, output = execute_basic(TYPE_LINK, q) output['facets'] = parse_facet_result(state, result) for doc in hits.get('hits', []): link = doc.get('_source') link['id'] = doc.get('_id') link['score'] = doc.get('_score') output['results'].append(link) return output
def tabular_query(document_id, sheet, args): scored = False q = match_all() text = args.get('q', '').strip() if len(text): scored = True q = text_query_string(text) try: rows = [int(r) for r in args.getlist('row')] except Exception: rows = [] if len(rows): scored = True q = { "bool": { "must": [q], "should": [{ "constant_score": { "filter": { 'terms': { 'row_id': rows } }, "boost": 1000 } }] } } q = add_filter(q, {'term': {'document_id': document_id}}) q = add_filter(q, {'term': {'sheet': sheet}}) # pprint(q) sort = [{'row_id': 'asc'}] if scored: sort.insert(0, '_score') return { 'from': 0, 'size': 100, 'query': q, 'sort': sort, '_source': ['document_id', 'sheet', 'row_id', 'raw'] }
def text_query(text): """ Construct the part of a query which is responsible for finding a piece of thext in the selected documents. """ if text is None or not len(text.strip()): return match_all() return { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(text), child_record({ "bool": { "should": [text_query_string(text)] } }) ] } }
def tabular_query(document_id, sheet, args): scored = False q = match_all() text = args.get('q', '').strip() if len(text): scored = True q = text_query_string(text) try: rows = [int(r) for r in args.getlist('row')] except Exception: rows = [] if len(rows): scored = True q = { "bool": { "must": q, "should": { "constant_score": { "filter": {'terms': {'row_id': rows}}, "boost": 1000 } } } } q = add_filter(q, {'term': {'document_id': document_id}}) q = add_filter(q, {'term': {'sheet': sheet}}) # pprint(q) sort = [{'row_id': 'asc'}] if scored: sort.insert(0, '_score') return { 'from': 0, 'size': 100, 'query': q, 'sort': sort, '_source': ['document_id', 'sheet', 'row_id', 'raw'] }
def entities_query(state, fields=None, facets=True, doc_counts=False): """Parse a user query string, compose and execute a query.""" if state.has_text: q = { "query_string": { "query": state.text, "fields": ['name^5', 'names^2', 'text'], "default_operator": "AND", "use_dis_max": True } } else: q = match_all() if state.raw_query: q = {"bool": {"must": [q, state.raw_query]}} q = authz_filter(q, state.authz, roles=True) aggs = {'scoped': {'global': {}, 'aggs': {}}} if facets: facets = list(state.facet_names) if 'collections' in facets: aggs = facet_collections(state, q, aggs) facets.remove('collections') aggs = aggregate(state, q, aggs, facets) if state.sort == 'doc_count': sort = [{'doc_count': 'desc'}, '_score'] elif state.sort == 'score': sort = ['_score', {'name_sort': 'asc'}] else: sort = [{'name_sort': 'asc'}] # pprint(q) q = { 'sort': sort, 'query': filter_query(q, state.filters), 'aggregations': aggs, 'size': state.limit, 'from': state.offset, '_source': fields or DEFAULT_FIELDS } result, hits, output = execute_basic(TYPE_ENTITY, q) output['facets'] = parse_facet_result(state, result) sub_queries = [] for doc in hits.get('hits', []): entity = doc.get('_source') entity['id'] = doc.get('_id') entity['score'] = doc.get('_score') entity['api_url'] = url_for('entities_api.view', id=doc.get('_id')) output['results'].append(entity) sq = {'term': {'entities.id': entity['id']}} sq = add_filter( sq, {'terms': { 'collection_id': state.authz.collections_read }}) sq = {'size': 0, 'query': sq} sub_queries.append(json.dumps({})) sub_queries.append(json.dumps(sq)) if doc_counts and len(sub_queries): # Get the number of matching documents for each entity. body = '\n'.join(sub_queries) res = es.msearch(index=es_index, doc_type=TYPE_DOCUMENT, body=body) for (entity, res) in zip(output['results'], res.get('responses')): entity['doc_count'] = res.get('hits', {}).get('total') return output