Пример #1
0
def get_n_latest_records(n_latest, field="last_updated", index=None):
    """ Gets latest N records from the index """

    query = {"size": n_latest, "query": QueryBuilder.generate_query_string(), "sort": [{field: {"order": "desc"}}]}

    query_result = es.search(index=index, doc_type=CFG_PUB_TYPE, body=query)
    return query_result["hits"]["hits"]
Пример #2
0
def get_n_latest_records(n_latest, field="last_updated", index=None):
    """ Gets latest N records from the index """

    query = {
        "size": n_latest,
        "query": QueryBuilder.generate_query_string(),
        "sort": [{
            field: {
                "order": "desc"
            }
        }]
    }

    query_result = es.search(index=index, doc_type=CFG_PUB_TYPE, body=query)
    return query_result['hits']['hits']
Пример #3
0
def search(query,
           index=None,
           filters=list(),
           size=10,
           include="*",
           exclude="authors",
           offset=0,
           sort_field=None,
           sort_order='',
           post_filter=None):
    """ Perform a search query.

    :param query: [string] query string e.g. 'higgs boson'
    :param index: [string] name of the index. If None a default is used
    :param filters: [list of tuples] list of filters for the query.
                    Currently supported: ('author', author_fullname),
                    ('collaboration', collaboration_name), ('date', date)
    :param size: [int] max number of hits that should be returned
    :param offset: [int] offset for the results (used for pagination)
    :param sort_by: [string] sorting field. Currently supported fields:
                    "title", "collaboration", "date", "relevance"
    :param sort_order: [string] order of the sorting either original
                    (for a particular field) or reversed. Supported:
                    '' or 'rev'

    :return: [dict] dictionary with processed results and facets
    """
    # If empty query then sort by date
    if query == '' and not sort_field:
        sort_field = 'date'

    query = HEPDataQueryParser.parse_query(query)

    # Build core query
    data_query = QueryBuilder.generate_query_string(query)
    pub_query = QueryBuilder.generate_query_string(query)
    authors_query = QueryBuilder.generate_nested_query('authors', query)

    query_builder = QueryBuilder()
    query_builder.add_child_parent_relation(
        CFG_DATA_TYPE,
        relation="child",
        related_query=data_query,
        other_queries=[pub_query, authors_query])

    # Add additional options
    query_builder.add_pagination(size=size, offset=offset)
    query_builder.add_sorting(sort_field=sort_field, sort_order=sort_order)
    query_builder.add_filters(filters)
    query_builder.add_post_filter(post_filter)
    query_builder.add_aggregations()
    query_builder.add_source_filter(include, exclude)

    if query:
        # Randomize search among the available shard copies.
        pub_result = es.search(index=index,
                               body=query_builder.query,
                               doc_type=CFG_PUB_TYPE)
    else:
        # Execute search only on the primary shards (to ensure no missing or duplicate results).
        pub_result = es.search(index=index,
                               body=query_builder.query,
                               doc_type=CFG_PUB_TYPE,
                               preference="_primary")

    parent_filter = {
        "filtered": {
            "filter": {
                "terms": {
                    "_id": [hit["_id"] for hit in pub_result['hits']['hits']]
                }
            }
        }
    }

    query_builder = QueryBuilder()
    query_builder.add_child_parent_relation(CFG_PUB_TYPE,
                                            relation="parent",
                                            related_query=parent_filter,
                                            must=True,
                                            other_queries=[data_query])
    query_builder.add_pagination(size=size * 50)

    data_result = es.search(index=index,
                            body=query_builder.query,
                            doc_type=CFG_DATA_TYPE)

    merged_results = merge_results(pub_result, data_result)

    return map_result(merged_results)