Пример #1
0
class QASearch(FacetedSearch):
    doc_types = [Question]
    index = settings.ES_INDEX

    fields = ['tags', 'title', 'body']

    facets = {
        'tags':
        TermsFacet(field='tags', size=5),
        'months':
        DateHistogramFacet(field='creation_date',
                           interval='month',
                           min_doc_count=0),
    }

    def query(self, search, query):
        if not query:
            return search
        # query in tags, title and body for query
        q = Q('multi_match', fields=['tags^10', 'title', 'body'], query=query)
        # also find questions that have answers matching query
        q |= Q('has_child',
               type='answer',
               query=Q('match', body=query),
               inner_hits={
                   'highlight': {
                       "pre_tags": ["[[["],
                       "post_tags": ["]]]"],
                       'fields': {
                           'body': {
                               'fragment_size': 30
                           }
                       }
                   },
                   '_source': False,
                   'size': 1
               })

        # take the rating field into account when sorting
        search = search.query(
            'function_score',
            query=q,
            functions=[SF('field_value_factor', field='rating')])

        return search

    def highlight(self, search):
        return search
Пример #2
0
class DocumentSearch(FacetedSearch):
    doc_types = ["document"]
    index = settings.ES_INDEX

    fields = ['title^5', 'abstract^3']

    facets = {
        'keywords':
        TermsFacet(field='keywords.keyword', size=5),
        'languages':
        TermsFacet(field='languages.keyword', size=10),
        'education_levels':
        TermsFacet(field='education_levels.keyword', size=10),
        'communities':
        TermsFacet(field='communities.keyword', size=10),
        'year_of_available':
        DateHistogramFacet(field='year_of_available',
                           interval='month',
                           min_doc_count=0),
        'document_type':
        TermsFacet(field='document_type.keyword', size=10),
        'document_file_type':
        TermsFacet(field='document_file_type.keyword', size=10),
        'document_authors':
        TermsFacet(field='document_authors', size=10),
        'license_type':
        TermsFacet(field='license_type.keyword', size=10),
        'collections':
        TermsFacet(field='collections.keyword', size=10),
    }

    def query(self, search, query):
        if not query:
            return search
        # query in tags, title and body for query
        q = Q('multi_match', fields=['title', 'abstract'], query=query)

        # take the title field into account when sorting
        search = search.query(
            'function_score',
            query=q,
            functions=[SF('field_value_factor', field='title')])

        return search

    def highlight(self, search):
        return search
Пример #3
0
def get_configured_facets():
    facets = None
    configuration = get_configuration()
    if configuration and hasattr(configuration, 'facets'):
        facets = configuration.facets.split()
    if facets:
        configured_facets = {}
        for facet in facets:
            if ',' in facet:
                field, interval = facet.split(',', 1)
                if ',' in interval:
                    intervals = interval.split(',')
                    ranges = []
                    for interval in intervals:
                        name, numbers = interval.split(':')
                        numbers = numbers.split('-')
                        irange = []
                        for number in numbers:
                            if number.lower() == 'none':
                                irange.append(None)
                            else:
                                try:
                                    irange.append(int(number))
                                except ValueError:
                                    continue
                        irange = tuple(irange)
                        ranges.append((name, irange))
                    configured_facets[field] = RangeFacet(field=field,
                                                          ranges=ranges)
                elif interval in DATE_INTERVALS:
                    configured_facets[field] = DateHistogramFacet(
                        field=field, interval=interval)
                else:
                    try:
                        interval = int(interval)
                        configured_facets[field] = HistogramFacet(
                            field=field, interval=interval)
                    except ValueError:
                        pass
            else:
                configured_facets[facet] = TermsFacet(field=facet + '.keyword')
    else:
        configured_facets = DEFAULT_FACETS
    return configured_facets
Пример #4
0
class ArticlesList(List):
    id = fields.FilteringFilterField(lookups=[
        constants.LOOKUP_FILTER_TERM,
        constants.LOOKUP_FILTER_TERMS,
        constants.LOOKUP_QUERY_GT,
        constants.LOOKUP_QUERY_GTE,
        constants.LOOKUP_QUERY_LT,
        constants.LOOKUP_QUERY_LTE,
        constants.LOOKUP_QUERY_IN,
        constants.LOOKUP_QUERY_EXCLUDE,
    ])
    ids = fields.IdsSearchField()
    title = fields.SearchFilterField(search_i18n_fields=['title'])

    notes = fields.SearchFilterField(search_i18n_fields=['notes'])

    q = fields.SearchFilterField(
        search_i18n_fields=['title', 'notes', 'datasets.title'],
    )

    tags = fields.FilteringFilterField(
        lookups=[
            constants.LOOKUP_FILTER_TERM,
            constants.LOOKUP_FILTER_TERMS,
            constants.LOOKUP_FILTER_WILDCARD,
            constants.LOOKUP_FILTER_PREFIX,
            constants.LOOKUP_QUERY_IN,
            constants.LOOKUP_QUERY_EXCLUDE,
            constants.LOOKUP_QUERY_CONTAINS
        ],
        translated=True
    )

    author = fields.FilteringFilterField(lookups=[
        constants.LOOKUP_FILTER_TERM,
        constants.LOOKUP_FILTER_TERMS,
        constants.LOOKUP_FILTER_WILDCARD,
        constants.LOOKUP_FILTER_PREFIX,
        constants.LOOKUP_QUERY_IN,
        constants.LOOKUP_QUERY_EXCLUDE,
        constants.LOOKUP_QUERY_CONTAINS,
        constants.LOOKUP_QUERY_STARTSWITH,
        constants.LOOKUP_QUERY_ENDSWITH,
    ])

    slug = fields.FilteringFilterField(lookups=[
        constants.LOOKUP_FILTER_TERM,
        constants.LOOKUP_FILTER_TERMS,
        constants.LOOKUP_QUERY_IN,
        constants.LOOKUP_QUERY_EXCLUDE,
        constants.LOOKUP_QUERY_STARTSWITH,
        constants.LOOKUP_QUERY_ENDSWITH,
    ])

    category = fields.NestedFilteringField('category', field_name='category.id', lookups=[
        constants.LOOKUP_FILTER_TERM,
        constants.LOOKUP_FILTER_TERMS,
        constants.LOOKUP_QUERY_GT,
        constants.LOOKUP_QUERY_GTE,
        constants.LOOKUP_QUERY_LT,
        constants.LOOKUP_QUERY_LTE,
        constants.LOOKUP_QUERY_IN
    ])

    facet = fields.FacetedFilterField(
        facets={
            'tags': TermsFacet(field='tags', size=500),
            'modified': DateHistogramFacet(field='modified', interval='month', size=500)
        },
    )

    title_suggest = fields.SuggesterFilterField(
        field='title.suggest',
        suggesters=[
            constants.SUGGESTER_COMPLETION,
            constants.SUGGESTER_PHRASE,
            constants.SUGGESTER_TERM
        ]
    )
    sort = fields.OrderingFilterField(
        default_ordering=['-modified', ],
        ordering_fields={
            "id": "id",
            "title": "title.{lang}.sort",
            "modified": "modified",
            "created": "created"
        }
    )

    highlight = fields.HighlightBackend(
        highlight_fields={
            'title': {
                'options': {
                    'pre_tags': ['<em>'],
                    'post_tags': ['</em>'],
                },
                'enabled': True
            },
            'notes': {
                'options': {
                    'pre_tags': ['<em>'],
                    'post_tags': ['</em>'],
                },
                'enabled': True
            }
        }
    )

    class Meta:
        strict = True
class TestFacetedFilterField(object):
    test_field_name = "faceted_filter_field"

    @pytest.mark.parametrize(
        ', '.join(['facets', 'context', 'aggs_query']),
        [(None, ['date'], {}),
         ({
             'status':
             TermsFacet(field='status'),
             'date':
             DateHistogramFacet(field='date', interval='year'),
             'range':
             RangeFacet(field='height',
                        ranges=[("few", (None, 2)), ("lots", (2, None))])
         }, ['unknown'], {}),
         ({
             'status': TermsFacet(field='status')
         }, ['status'], {
             'aggs': {
                 '_filter_status': {
                     'aggs': {
                         'status': {
                             'terms': {
                                 'field': 'status'
                             }
                         }
                     },
                     'filter': {
                         'match_all': {}
                     }
                 }
             }
         }),
         ({
             'date': DateHistogramFacet(field='date', interval='year')
         }, ['date'], {
             'aggs': {
                 '_filter_date': {
                     'aggs': {
                         'date': {
                             'date_histogram': {
                                 'field': 'date',
                                 'interval': 'year',
                                 'min_doc_count': 0
                             }
                         }
                     },
                     'filter': {
                         'match_all': {}
                     }
                 }
             },
         }),
         ({
             'range':
             RangeFacet(field='height',
                        ranges=[("few", (None, 2)), ("lots", (2, None))])
         }, ['range'], {
             'aggs': {
                 '_filter_range': {
                     'aggs': {
                         'range': {
                             'range': {
                                 'field':
                                 'height',
                                 'keyed':
                                 False,
                                 'ranges': [{
                                     'key': 'few',
                                     'to': 2
                                 }, {
                                     'key': 'lots',
                                     'from': 2
                                 }]
                             }
                         }
                     },
                     'filter': {
                         'match_all': {}
                     }
                 }
             }
         })])
    def test_queryset(self, facets, context, aggs_query, es_dsl_queryset):
        fld = FacetedFilterField(facets=facets,
                                 field_name=self.test_field_name)

        valid_query = {}
        valid_query.update(aggs_query)

        qs = fld.prepare_queryset(es_dsl_queryset, context)
        ret = qs.to_dict()
        assert ret == valid_query
Пример #6
0
from plone.app.contentlisting.interfaces import IContentListingObject
from Products.CMFPlone.PloneBatch import Batch
from Products.CMFPlone.utils import getToolByName
from Products.Five.browser import BrowserView

from collective.es.index.esproxyindex import SEARCH_FIELDS
from collective.es.index.esproxyindex import BATCH_SIZE
from collective.es.index.utils import get_configuration
from collective.es.index.utils import get_query_client
from collective.es.index.utils import index_name

DEFAULT_FACETS = {
    'subjects': TermsFacet(field='subjects.keyword'),
    'review_state': TermsFacet(field='review_state.keyword'),
    'modified': DateHistogramFacet(field='modified', interval='month'),
}

DATE_INTERVALS = ['month', 'week', 'day', 'hour']

DATE_FORMATS = {
    'month': '%B %Y',
    'week': 'Week of %b %-d, %Y',
    'day': '%B %-d, %Y',
    'hour': '%b %-d %-I %p',
}


def get_configured_facets():
    facets = None
    configuration = get_configuration()
Пример #7
0
class PublicationSearch(FacetedSearch):
    doc_types = [Publication]
    index = 'offenegesetze_publications'
    fields = ['title^3', 'content']
    equivalences = {
        'year': {'date'},
        'date': {'year'}
    }

    facets = {
        'kind': TermsFacet(field='kind'),
        'year': NumberRangeFacet(field='year'),
        'page': NumberRangeFacet(field='page'),
        'number': NumberRangeFacet(field='number'),
        'date': DateHistogramFacet(
            field='date', interval='year'
        )
    }

    def __getitem__(self, n):
        assert isinstance(n, slice)
        self._s = self._s[n]
        return self

    def aggregate(self, search):
        "Respect equivalences of facets"

        for f, facet in self.facets.items():
            agg = facet.get_aggregation()
            agg_filter = Q('match_all')
            for field, filter in self._filters.items():
                if f == field or field in self.equivalences.get(f, set()):
                    continue
                agg_filter &= filter
            search.aggs.bucket(
                '_filter_' + f,
                'filter',
                filter=agg_filter
            ).bucket(f, agg)

    def add_sort(self, *sort_args):
        self._sort = sort_args
        self._s = self._s.sort(*sort_args)

    def add_pagination_filter(self, filter_kwargs):
        self._s = self._s.filter('range', **filter_kwargs)

    def query(self, search, query):
        """
        Add query part to ``search``.
        Override this if you wish to customize the query used.
        """
        if query:
            return search.query(
                "simple_query_string",
                query=query,
                fields=self.fields,
                default_operator='and',
                lenient=True
            )
        return search
Пример #8
0
class ArticlesList(List):
    id = fields.FilteringFilterField(lookups=[
        constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS, constants.
        LOOKUP_QUERY_GT, constants.LOOKUP_QUERY_GTE, constants.LOOKUP_QUERY_LT,
        constants.LOOKUP_QUERY_LTE, constants.LOOKUP_QUERY_IN
    ])
    ids = fields.IdsSearchField()
    q = fields.SearchFilterField(
        search_fields=['title', 'notes', 'author', 'tags', 'datasets.title'], )

    tags = fields.FilteringFilterField(lookups=[
        constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS,
        constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX,
        constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE
    ])

    author = fields.FilteringFilterField(lookups=[
        constants.LOOKUP_FILTER_TERM, constants.LOOKUP_FILTER_TERMS,
        constants.LOOKUP_FILTER_WILDCARD, constants.LOOKUP_FILTER_PREFIX,
        constants.LOOKUP_QUERY_IN, constants.LOOKUP_QUERY_EXCLUDE
    ])

    facet = fields.FacetedFilterField(facets={
        'tags':
        TermsFacet(field='tags', size=500),
        'modified':
        DateHistogramFacet(field='modified', interval='month', size=500)
    }, )

    title_suggest = fields.SuggesterFilterField(
        field='title.suggest',
        suggesters=[
            constants.SUGGESTER_COMPLETION, constants.SUGGESTER_PHRASE,
            constants.SUGGESTER_TERM
        ])
    sort = fields.OrderingFilterField(default_ordering=[
        '-modified',
    ],
                                      ordering_fields={
                                          "id": "id",
                                          "title": "title.raw",
                                          "modified": "modified",
                                          "created": "created"
                                      })

    highlight = fields.HighlightBackend(
        highlight_fields={
            'title': {
                'options': {
                    'pre_tags': ['<em>'],
                    'post_tags': ['</em>'],
                },
                'enabled': True
            },
            'notes': {
                'options': {
                    'pre_tags': ['<em>'],
                    'post_tags': ['</em>'],
                },
                'enabled': True
            }
        })

    class Meta:
        strict = True
Пример #9
0
        def _data(self, request, cleaned, *args, explain=None, **kwargs):
            m_search = MultiSearch()
            search = Search(using=connection,
                            index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME,
                            extra={'size': 0})
            search.aggs.bucket(
                'documents_by_type',
                TermsFacet(field='model').get_aggregation()).bucket(
                    'by_month',
                    DateHistogramFacet(field='created',
                                       interval='month',
                                       min_doc_count=0).get_aggregation())
            d_search = DatasetDocument().search().extra(size=0).filter(
                'match', status='published')
            r_search = ResourceDocument().search().extra(size=0).filter(
                'match', status='published')

            d_search.aggs.bucket(
                'datasets_by_institution',
                NestedFacet(
                    'institution',
                    TermsFacet(field='institution.id')).get_aggregation())

            d_search.aggs.bucket(
                'datasets_by_categories',
                NestedFacet(
                    'categories',
                    TermsFacet(field='categories.id', min_doc_count=1,
                               size=50)).get_aggregation())
            d_search.aggs.bucket(
                'datasets_by_category',
                NestedFacet(
                    'category',
                    TermsFacet(field='category.id', min_doc_count=1,
                               size=50)).get_aggregation())

            d_search.aggs.bucket('datasets_by_tag',
                                 TermsFacet(field='tags').get_aggregation())

            d_search.aggs.bucket(
                'datasets_by_keyword',
                Nested(aggs={
                    'inner':
                    Filter(
                        aggs={'inner': Terms(field='keywords.name')},
                        term={'keywords.language': get_language()},
                    )
                },
                       path='keywords'))

            d_search.aggs.bucket('datasets_by_formats',
                                 TermsFacet(field='formats').get_aggregation())
            d_search.aggs.bucket(
                'datasets_by_openness_scores',
                TermsFacet(field='openness_scores').get_aggregation())
            r_search.aggs.bucket('resources_by_type',
                                 TermsFacet(field='type').get_aggregation())
            m_search = m_search.add(search)
            m_search = m_search.add(d_search)
            m_search = m_search.add(r_search)
            if explain == '1':
                return m_search.to_dict()
            try:
                resp1, resp2, resp3 = m_search.execute()
                # TODO: how to concatenate two responses in more elegant way?
                resp1.aggregations.datasets_by_institution = resp2.aggregations.datasets_by_institution
                resp1.aggregations.datasets_by_categories = resp2.aggregations.datasets_by_categories
                resp1.aggregations.datasets_by_category = resp2.aggregations.datasets_by_category
                resp1.aggregations.datasets_by_tag = resp2.aggregations.datasets_by_tag
                resp1.aggregations.datasets_by_keyword = resp2.aggregations.datasets_by_keyword
                resp1.aggregations.datasets_by_formats = resp2.aggregations.datasets_by_formats
                resp1.aggregations.datasets_by_openness_scores = resp2.aggregations.datasets_by_openness_scores
                resp1.aggregations.resources_by_type = resp3.aggregations.resources_by_type
                return resp1
            except TransportError as err:
                try:
                    description = err.info['error']['reason']
                except KeyError:
                    description = err.error
                raise falcon.HTTPBadRequest(description=description)
Пример #10
0
        msg = 'Search Engine unknown error: {}'.format(e)
        output['error'] = msg
    return output


all_facets = {
    'subjects':
    TermsFacet(field='metadata_json.subjects.subject.raw'),
    'creators':
    TermsFacet(field='metadata_json.creators.creatorName.raw'),
    'publicationYear':
    TermsFacet(field='metadata_json.publicationYear'),
    'publisher':
    TermsFacet(field='metadata_json.publisher.raw'),
    'collectedStartDate':
    DateHistogramFacet(field='metadata_json.dates.date.gte', interval="month"),
    'collectedEndDate':
    DateHistogramFacet(field='metadata_json.dates.date.lte', interval="month"),
}


class MetadataSearch(FacetedSearch):

    doc_types = [
        Metadata,
    ]
    date_query = {
        'simple_query_string': {
            'fields': ['metadata_json.dates.dateType'],
            'query': 'Collected'
        }