Пример #1
0
    def build(self, q=None, **options):
        """
        Build a query according to q and options.
        This is the public method called by API handlers.

        Regarding scopes:
            scopes: [str] nonempty, match query.
            scopes: NoneType, or [], no scope, so query string query.

        Additionally support these options:
            explain: include es scoring information
            userquery: customized function to interpret q

        * additional keywords are passed through as es keywords
            for example: 'explain', 'version' ...

        * multi-search is supported when q is a list. all queries
            are built individually and then sent in one request.

        """
        options = dotdict(options)

        if options.scroll_id:
            # bypass all query building stages
            return ESScrollID(options.scroll_id)

        if options.fetch_all:
            # clean up conflicting parameters
            options.pop('sort', None)
            options.pop('size', None)

        try:
            # process single q vs list of q(s).
            # dispatch 'val' vs 'key:val' to corresponding functions.

            if isinstance(q, list):
                search = MultiSearch()
                for _q in q:
                    _search = self._build_one(_q, options)
                    search = search.add(_search)
            else:  # str, int ...
                search = self._build_one(q, options)

        except IllegalOperation as exc:
            raise ValueError(str(exc))  # ex. sorting by -_score

        if options.get('rawquery'):
            raise RawQueryInterrupt(search.to_dict())

        return search
Пример #2
0
        def _data(self, request, cleaned, *args, explain=None, **kwargs):
            m_search = MultiSearch()
            search = Search(using=connection,
                            index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME,
                            extra={'size': 0})
            search.aggs.bucket(
                'documents_by_type',
                TermsFacet(field='model').get_aggregation()).bucket(
                    'by_month',
                    DateHistogramFacet(field='created',
                                       interval='month',
                                       min_doc_count=0).get_aggregation())
            d_search = DatasetDocument().search().extra(size=0).filter(
                'match', status='published')
            r_search = ResourceDocument().search().extra(size=0).filter(
                'match', status='published')

            d_search.aggs.bucket(
                'datasets_by_institution',
                NestedFacet(
                    'institution',
                    TermsFacet(field='institution.id')).get_aggregation())

            d_search.aggs.bucket(
                'datasets_by_categories',
                NestedFacet(
                    'categories',
                    TermsFacet(field='categories.id', min_doc_count=1,
                               size=50)).get_aggregation())
            d_search.aggs.bucket(
                'datasets_by_category',
                NestedFacet(
                    'category',
                    TermsFacet(field='category.id', min_doc_count=1,
                               size=50)).get_aggregation())

            d_search.aggs.bucket('datasets_by_tag',
                                 TermsFacet(field='tags').get_aggregation())

            d_search.aggs.bucket(
                'datasets_by_keyword',
                Nested(aggs={
                    'inner':
                    Filter(
                        aggs={'inner': Terms(field='keywords.name')},
                        term={'keywords.language': get_language()},
                    )
                },
                       path='keywords'))

            d_search.aggs.bucket('datasets_by_formats',
                                 TermsFacet(field='formats').get_aggregation())
            d_search.aggs.bucket(
                'datasets_by_openness_scores',
                TermsFacet(field='openness_scores').get_aggregation())
            r_search.aggs.bucket('resources_by_type',
                                 TermsFacet(field='type').get_aggregation())
            m_search = m_search.add(search)
            m_search = m_search.add(d_search)
            m_search = m_search.add(r_search)
            if explain == '1':
                return m_search.to_dict()
            try:
                resp1, resp2, resp3 = m_search.execute()
                # TODO: how to concatenate two responses in more elegant way?
                resp1.aggregations.datasets_by_institution = resp2.aggregations.datasets_by_institution
                resp1.aggregations.datasets_by_categories = resp2.aggregations.datasets_by_categories
                resp1.aggregations.datasets_by_category = resp2.aggregations.datasets_by_category
                resp1.aggregations.datasets_by_tag = resp2.aggregations.datasets_by_tag
                resp1.aggregations.datasets_by_keyword = resp2.aggregations.datasets_by_keyword
                resp1.aggregations.datasets_by_formats = resp2.aggregations.datasets_by_formats
                resp1.aggregations.datasets_by_openness_scores = resp2.aggregations.datasets_by_openness_scores
                resp1.aggregations.resources_by_type = resp3.aggregations.resources_by_type
                return resp1
            except TransportError as err:
                try:
                    description = err.info['error']['reason']
                except KeyError:
                    description = err.error
                raise falcon.HTTPBadRequest(description=description)