Пример #1
0
def records_query(document_id, state):
    try:
        rows = [int(r) for r in state.getlist('row')]
    except:
        rows = []

    score_query = state.has_text or len(rows)
    shoulds = records_query_shoulds(state)
    if not len(shoulds):
        shoulds = [match_all()]

    if len(rows):
        shoulds.append({
            "constant_score": {
                "filter": {
                    'terms': {
                        'index': rows
                    }
                },
                "boost": 1000
            }
        })

    query = records_query_internal(document_id, shoulds, size=state.limit)
    query['query'] = filter_query(query['query'], state.filters)
    query['from'] = state.offset

    sort = [{'index': 'asc'}, {'page': 'asc'}]
    if score_query:
        sort.insert(0, '_score')
    return query
Пример #2
0
def entities_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if text is None or not len(text):
        q = match_all()
    else:
        q = {
            "query_string": {
                "query":
                text,
                "fields": [
                    'name^15', 'name_latin^5', 'terms^12', 'terms_latin^3',
                    'summary^10', 'summary_latin^7', 'description^5',
                    'description_latin^3'
                ],
                "default_operator":
                "AND",
                "use_dis_max":
                True
            }
        }

    q = authz_filter(q)
    filters = parse_filters(args)
    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        aggs = aggregate(q, aggs, facets)

    sort_mode = args.get('sort', '').strip().lower()
    default_sort = 'score' if len(text) else 'doc_count'
    sort_mode = sort_mode or default_sort
    if sort_mode == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif sort_mode == 'alphabet':
        sort = [{'name': 'asc'}, '_score']
    elif sort_mode == 'score':
        sort = ['_score']

    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Пример #3
0
def text_query(text):
    """Part of a query which finds a piece of text."""
    if text is None or not len(text.strip()):
        return match_all()
    return {
        "bool": {
            "minimum_should_match":
            1,
            "should": [
                meta_query_string(text),
                child_record({"bool": {
                    "should": [text_query_string(text)]
                }})
            ]
        }
    }
Пример #4
0
def text_query(text):
    """Part of a query which finds a piece of text."""
    if text is None or not len(text.strip()):
        return match_all()
    return {
        "bool": {
            "minimum_should_match": 1,
            "should": [
                meta_query_string(text),
                child_record({
                    "bool": {
                        "should": [text_query_string(text)]
                    }
                })
            ]
        }
    }
Пример #5
0
def entities_query(args, fields=None, facets=True):
    """Parse a user query string, compose and execute a query."""
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    if text is None or not len(text):
        q = match_all()
    else:
        q = {
            "query_string": {
                "query": text,
                "fields": ['name^15', 'name_latin^5',
                           'terms^12', 'terms_latin^3',
                           'summary^10', 'summary_latin^7',
                           'description^5', 'description_latin^3'],
                "default_operator": "AND",
                "use_dis_max": True
            }
        }

    q = authz_filter(q)
    filters = parse_filters(args)
    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = args.getlist('facet')
        if 'collections' in facets:
            aggs = facet_collections(q, aggs, filters)
            facets.remove('collections')
        aggs = aggregate(q, aggs, facets)

    sort_mode = args.get('sort', '').strip().lower()
    default_sort = 'score' if len(text) else 'doc_count'
    sort_mode = sort_mode or default_sort
    if sort_mode == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif sort_mode == 'alphabet':
        sort = [{'name': 'asc'}, '_score']
    elif sort_mode == 'score':
        sort = ['_score']

    return {
        'sort': sort,
        'query': filter_query(q, filters, OR_FIELDS),
        'aggregations': aggs,
        '_source': fields or DEFAULT_FIELDS
    }
Пример #6
0
def links_query(origin, state):
    """Parse a user query string, compose and execute a query."""
    if state.has_text:
        q = {
            "query_string": {
                "query": state.text,
                "fields": ['name^5', 'names^2', 'text'],
                "default_operator": "AND",
                "use_dis_max": True
            }
        }
    else:
        q = match_all()
    ids = origin.get('ids') or [origin.get('id')]
    q = add_filter(q, {'terms': {'origin.id': ids}})
    q = authz_filter(q, state.authz, roles=True)

    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    aggs = aggregate(state, q, aggs, state.facet_names)

    if state.sort == 'score':
        sort = ['_score']
    else:
        sort = [{
            'properties.start_date': 'desc'
        }, {
            'properties.end_date': 'desc'
        }]

    q = {
        'sort': sort,
        'query': filter_query(q, state.filters),
        'aggregations': aggs,
        'size': state.limit,
        'from': state.offset,
        '_source': DEFAULT_FIELDS
    }

    result, hits, output = execute_basic(TYPE_LINK, q)
    output['facets'] = parse_facet_result(state, result)
    for doc in hits.get('hits', []):
        link = doc.get('_source')
        link['id'] = doc.get('_id')
        link['score'] = doc.get('_score')
        output['results'].append(link)
    return output
Пример #7
0
def tabular_query(document_id, sheet, args):
    scored = False
    q = match_all()
    text = args.get('q', '').strip()
    if len(text):
        scored = True
        q = text_query_string(text)

    try:
        rows = [int(r) for r in args.getlist('row')]
    except Exception:
        rows = []

    if len(rows):
        scored = True
        q = {
            "bool": {
                "must": [q],
                "should": [{
                    "constant_score": {
                        "filter": {
                            'terms': {
                                'row_id': rows
                            }
                        },
                        "boost": 1000
                    }
                }]
            }
        }

    q = add_filter(q, {'term': {'document_id': document_id}})
    q = add_filter(q, {'term': {'sheet': sheet}})

    # pprint(q)

    sort = [{'row_id': 'asc'}]
    if scored:
        sort.insert(0, '_score')
    return {
        'from': 0,
        'size': 100,
        'query': q,
        'sort': sort,
        '_source': ['document_id', 'sheet', 'row_id', 'raw']
    }
Пример #8
0
def text_query(text):
    """ Construct the part of a query which is responsible for finding a
    piece of thext in the selected documents. """
    if text is None or not len(text.strip()):
        return match_all()
    return {
        "bool": {
            "minimum_should_match": 1,
            "should": [
                meta_query_string(text),
                child_record({
                    "bool": {
                        "should": [text_query_string(text)]
                    }
                })
            ]
        }
    }
Пример #9
0
def tabular_query(document_id, sheet, args):
    scored = False
    q = match_all()
    text = args.get('q', '').strip()
    if len(text):
        scored = True
        q = text_query_string(text)

    try:
        rows = [int(r) for r in args.getlist('row')]
    except Exception:
        rows = []

    if len(rows):
        scored = True
        q = {
            "bool": {
                "must": q,
                "should": {
                    "constant_score": {
                        "filter": {'terms': {'row_id': rows}},
                        "boost": 1000
                    }
                }
            }
        }

    q = add_filter(q, {'term': {'document_id': document_id}})
    q = add_filter(q, {'term': {'sheet': sheet}})

    # pprint(q)

    sort = [{'row_id': 'asc'}]
    if scored:
        sort.insert(0, '_score')
    return {
        'from': 0,
        'size': 100,
        'query': q,
        'sort': sort,
        '_source': ['document_id', 'sheet', 'row_id', 'raw']
    }
Пример #10
0
def entities_query(state, fields=None, facets=True, doc_counts=False):
    """Parse a user query string, compose and execute a query."""
    if state.has_text:
        q = {
            "query_string": {
                "query": state.text,
                "fields": ['name^5', 'names^2', 'text'],
                "default_operator": "AND",
                "use_dis_max": True
            }
        }
    else:
        q = match_all()

    if state.raw_query:
        q = {"bool": {"must": [q, state.raw_query]}}

    q = authz_filter(q, state.authz, roles=True)

    aggs = {'scoped': {'global': {}, 'aggs': {}}}
    if facets:
        facets = list(state.facet_names)
        if 'collections' in facets:
            aggs = facet_collections(state, q, aggs)
            facets.remove('collections')
        aggs = aggregate(state, q, aggs, facets)

    if state.sort == 'doc_count':
        sort = [{'doc_count': 'desc'}, '_score']
    elif state.sort == 'score':
        sort = ['_score', {'name_sort': 'asc'}]
    else:
        sort = [{'name_sort': 'asc'}]

    # pprint(q)
    q = {
        'sort': sort,
        'query': filter_query(q, state.filters),
        'aggregations': aggs,
        'size': state.limit,
        'from': state.offset,
        '_source': fields or DEFAULT_FIELDS
    }

    result, hits, output = execute_basic(TYPE_ENTITY, q)
    output['facets'] = parse_facet_result(state, result)
    sub_queries = []
    for doc in hits.get('hits', []):
        entity = doc.get('_source')
        entity['id'] = doc.get('_id')
        entity['score'] = doc.get('_score')
        entity['api_url'] = url_for('entities_api.view', id=doc.get('_id'))
        output['results'].append(entity)

        sq = {'term': {'entities.id': entity['id']}}
        sq = add_filter(
            sq, {'terms': {
                'collection_id': state.authz.collections_read
            }})
        sq = {'size': 0, 'query': sq}
        sub_queries.append(json.dumps({}))
        sub_queries.append(json.dumps(sq))

    if doc_counts and len(sub_queries):
        # Get the number of matching documents for each entity.
        body = '\n'.join(sub_queries)
        res = es.msearch(index=es_index, doc_type=TYPE_DOCUMENT, body=body)
        for (entity, res) in zip(output['results'], res.get('responses')):
            entity['doc_count'] = res.get('hits', {}).get('total')

    return output