def build(self, q=None, **options): """ Build a query according to q and options. This is the public method called by API handlers. Regarding scopes: scopes: [str] nonempty, match query. scopes: NoneType, or [], no scope, so query string query. Additionally support these options: explain: include es scoring information userquery: customized function to interpret q * additional keywords are passed through as es keywords for example: 'explain', 'version' ... * multi-search is supported when q is a list. all queries are built individually and then sent in one request. """ options = dotdict(options) if options.scroll_id: # bypass all query building stages return ESScrollID(options.scroll_id) if options.fetch_all: # clean up conflicting parameters options.pop('sort', None) options.pop('size', None) try: # process single q vs list of q(s). # dispatch 'val' vs 'key:val' to corresponding functions. if isinstance(q, list): search = MultiSearch() for _q in q: _search = self._build_one(_q, options) search = search.add(_search) else: # str, int ... search = self._build_one(q, options) except IllegalOperation as exc: raise ValueError(str(exc)) # ex. sorting by -_score if options.get('rawquery'): raise RawQueryInterrupt(search.to_dict()) return search
def _data(self, request, cleaned, *args, explain=None, **kwargs): m_search = MultiSearch() search = Search(using=connection, index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME, extra={'size': 0}) search.aggs.bucket( 'documents_by_type', TermsFacet(field='model').get_aggregation()).bucket( 'by_month', DateHistogramFacet(field='created', interval='month', min_doc_count=0).get_aggregation()) d_search = DatasetDocument().search().extra(size=0).filter( 'match', status='published') r_search = ResourceDocument().search().extra(size=0).filter( 'match', status='published') d_search.aggs.bucket( 'datasets_by_institution', NestedFacet( 'institution', TermsFacet(field='institution.id')).get_aggregation()) d_search.aggs.bucket( 'datasets_by_categories', NestedFacet( 'categories', TermsFacet(field='categories.id', min_doc_count=1, size=50)).get_aggregation()) d_search.aggs.bucket( 'datasets_by_category', NestedFacet( 'category', TermsFacet(field='category.id', min_doc_count=1, size=50)).get_aggregation()) d_search.aggs.bucket('datasets_by_tag', TermsFacet(field='tags').get_aggregation()) d_search.aggs.bucket( 'datasets_by_keyword', Nested(aggs={ 'inner': Filter( aggs={'inner': Terms(field='keywords.name')}, term={'keywords.language': get_language()}, ) }, path='keywords')) d_search.aggs.bucket('datasets_by_formats', TermsFacet(field='formats').get_aggregation()) d_search.aggs.bucket( 'datasets_by_openness_scores', TermsFacet(field='openness_scores').get_aggregation()) r_search.aggs.bucket('resources_by_type', TermsFacet(field='type').get_aggregation()) m_search = m_search.add(search) m_search = m_search.add(d_search) m_search = m_search.add(r_search) if explain == '1': return m_search.to_dict() try: resp1, resp2, resp3 = m_search.execute() # TODO: how to concatenate two responses in more elegant way? resp1.aggregations.datasets_by_institution = resp2.aggregations.datasets_by_institution resp1.aggregations.datasets_by_categories = resp2.aggregations.datasets_by_categories resp1.aggregations.datasets_by_category = resp2.aggregations.datasets_by_category resp1.aggregations.datasets_by_tag = resp2.aggregations.datasets_by_tag resp1.aggregations.datasets_by_keyword = resp2.aggregations.datasets_by_keyword resp1.aggregations.datasets_by_formats = resp2.aggregations.datasets_by_formats resp1.aggregations.datasets_by_openness_scores = resp2.aggregations.datasets_by_openness_scores resp1.aggregations.resources_by_type = resp3.aggregations.resources_by_type return resp1 except TransportError as err: try: description = err.info['error']['reason'] except KeyError: description = err.error raise falcon.HTTPBadRequest(description=description)