示例#1
0
    def _get_text_queries(self, *, query, fields):
        """
        Returns a list of query objects according to the query.

        SimpleQueryString provides a syntax to let advanced users manipulate
        the results explicitly.

        We need to search for both "and" and "or" operators.
        The score of "and" should be higher as it satisfies both "or" and "and".

        For valid options, see:

        - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html  # noqa
        """
        queries = []
        is_advanced_query = self.use_advanced_query or self._is_advanced_query(
            query)
        for operator in self.operators:
            if is_advanced_query:
                query_string = SimpleQueryString(
                    query=query,
                    fields=fields,
                    default_operator=operator,
                )
            else:
                query_string = self._get_fuzzy_query(
                    query=query,
                    fields=fields,
                    operator=operator,
                )
            queries.append(query_string)
        return queries
示例#2
0
    def query(self, search, query):
        """
        Add query part to ``search`` when needed.

        Also:

        * Adds SimpleQueryString instead of default query.
        * Adds HTML encoding of results to avoid XSS issues.
        """
        search = search.highlight_options(encoder='html',
                                          number_of_fragments=3)
        search = search.source(exclude=['content', 'headers'])

        all_queries = []

        # need to search for both 'and' and 'or' operations
        # the score of and should be higher as it satisfies both or and and

        for operator in self.operators:
            query_string = SimpleQueryString(query=query,
                                             fields=self.fields,
                                             default_operator=operator)
            all_queries.append(query_string)

        # run bool query with should, so it returns result where either of the query matches
        bool_query = Bool(should=all_queries)

        search = search.query(bool_query)
        return search
    def _get_text_query(self, *, query, fields, operator):
        """
        Returns a text query object according to the query.

        - SimpleQueryString: Provides a syntax to let advanced users manipulate
          the results explicitly.
        - MultiMatch: Allows us to have more control over the results
          (like fuzziness) to provide a better experience for simple queries.

        For valid options, see:

        - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
        - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html  # noqa
        """
        if self.use_advanced_query or self._is_advanced_query(query):
            query_string = SimpleQueryString(query=query,
                                             fields=fields,
                                             default_operator=operator)
        else:
            query_string = MultiMatch(
                query=query,
                fields=fields,
                operator=operator,
                fuzziness="AUTO:4,6",
                prefix_length=1,
            )
        return query_string
示例#4
0
 def __call__(self, search, params):
     if "any" not in params:
         return search
     qs = " ".join(popall(params, "any"))
     return search.query(
         SimpleQueryString(
             query=qs,
             fields=["quote", "tags", "text", "uri.parts"],
             default_operator="and",
         ))
示例#5
0
    def _get_query(qterm):
        """
        Return query for search-term (used in search and search_day)

        :param qterm: ``str`` string to build query for
        :return: ``Query`` Search-Query
        """
        if helpers.is_simple_query_string_query(qterm):
            msg_query = SimpleQueryString(
                query=qterm,
                fields=['msg', 'username', 'channel'],
                default_operator='AND',
                boost=5)
        else:
            msg_query = DisMax(tie_breaker=0.7,
                               boost=1,
                               queries=[
                                   SimpleQueryString(
                                       query=qterm,
                                       fields=['username', 'channel'],
                                       default_operator='AND',
                                       boost=1),
                                   MatchPhrase(msg={
                                       'query': qterm,
                                       'boost': 1
                                   })
                               ])
        pos = DisMax(tie_breaker=0.7,
                     boost=1,
                     queries=[
                         msg_query,
                         Common(msg={
                             'query': qterm,
                             'cutoff_frequency': 0.001
                         })
                     ])

        return pos
示例#6
0
    def generate_nested_query(self, query, path, fields, inner_hits):
        """Generate a nested query with passed parameters."""
        queries = []

        for operator in self.operators:
            query_string = SimpleQueryString(query=query,
                                             fields=fields,
                                             default_operator=operator)
            queries.append(query_string)

        bool_query = Bool(should=queries)

        nested_query = Nested(path=path,
                              inner_hits=inner_hits,
                              query=bool_query)
        return nested_query
示例#7
0
    def get_es_query(cls, query):
        """Return the Elasticsearch query generated from the query string"""
        all_queries = []

        # Need to search for both 'AND' and 'OR' operations
        # The score of AND should be higher as it satisfies both OR and AND
        for operator in ['AND', 'OR']:
            query_string = SimpleQueryString(query=query,
                                             fields=cls.search_fields,
                                             default_operator=operator)
            all_queries.append(query_string)

        # Run bool query with should, so it returns result where either of the query matches
        bool_query = Bool(should=all_queries)

        return bool_query
示例#8
0
    def query(self, search, query):
        """Manipulates query to support nested query."""
        search = search.highlight_options(**self._common_highlight_options)

        all_queries = []

        # match query for the title (of the page) field.
        for operator in self.operators:
            all_queries.append(
                SimpleQueryString(query=query,
                                  fields=self.fields,
                                  default_operator=operator))

        # nested query for search in sections
        sections_nested_query = self.generate_nested_query(
            query=query,
            path='sections',
            fields=self._section_fields,
            inner_hits={
                'highlight':
                dict(self._common_highlight_options,
                     fields={
                         'sections.title': {},
                         'sections.content': {},
                     })
            })

        # nested query for search in domains
        domains_nested_query = self.generate_nested_query(
            query=query,
            path='domains',
            fields=self._domain_fields,
            inner_hits={
                'highlight':
                dict(self._common_highlight_options,
                     fields={
                         'domains.name': {},
                         'domains.docstrings': {},
                     })
            })

        all_queries.extend([sections_nested_query, domains_nested_query])
        final_query = Bool(should=all_queries)
        search = search.query(final_query)

        return search
示例#9
0
    def query(self, search, query):
        """Use a custom SimpleQueryString instead of default query."""

        search = super().query(search, query)

        all_queries = []

        # need to search for both 'and' and 'or' operations
        # the score of and should be higher as it satisfies both or and and
        for operator in ['AND', 'OR']:
            query_string = SimpleQueryString(query=query,
                                             fields=self.fields,
                                             default_operator=operator)
            all_queries.append(query_string)

        # run bool query with should, so it returns result where either of the query matches
        bool_query = Bool(should=all_queries)

        search = search.query(bool_query)
        return search
示例#10
0
    def _get_text_query(self, *, query, fields, operator):
        """
        Returns a text query object according to the query.

        - SimpleQueryString: Provides a syntax to let advanced users manipulate
          the results explicitly.
        - MultiMatch: Allows us to have more control over the results
          (like fuzziness) to provide a better experience for simple queries.
        """
        if self.use_advanced_query or self._is_advanced_query(query):
            query_string = SimpleQueryString(query=query,
                                             fields=fields,
                                             default_operator=operator)
        else:
            query_string = MultiMatch(
                query=query,
                fields=fields,
                operator=operator,
                fuzziness="AUTO",
            )
        return query_string
示例#11
0
文件: main.py 项目: ehfeng/rubberband
def site(slug):
    """
	GET args:
		q (Optional[str]): Query string
		sort (Optional[str]): datetime, matches
		order (Optional[str]): asc or desc
		... (custom attribute search)
	"""
    q = request.args.get('q')
    if q:
        from elasticsearch_dsl.query import SimpleQueryString
        pages = Page.search().query(SimpleQueryString(query=q)).execute()
        return render_template('search.html', pages=pages)
    else:
        pages = Page.search().execute()
        return render_template('site.html', pages=pages)

    pages_dict = [{
        'path': p.to_dict()['path'],
        'body': p.to_dict()['body']
    } for p in pages]
    referer = urlparse(request.headers.get('Referer'))
    if referer.netloc == config['rubberband']['host']:
        return render_template('search.html', pages=pages_dict)
示例#12
0
    def add_query_fields(self, s, qterm, **kwargs):
        r"""Searches in the elasticsearch index for the mail

            :param s:
                DSL-Query to modify
            :type s: ``DslSearch`` Elasticsearch DSL query
            :param qterm:
                Query-string
            :type qterm: ``str``
            :param \**kwargs:
                See below

            :Keyword Arguments:
                * *date_gte* (``datetime``) --
                  Filter, From: only emails greater than
                * *date_lte* (``datetime``) --
                  Filter, To: only emails less than
                * *date_sliding* (``str``) --
                  Filter sliding window, only emails of the past XX-hours/days/years... e.g. '-1d/d','-5y/y' --
                  See: https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math
                * *date_sliding_type* (``str``) --
                  Valid date-type: e.g. y M d
                * *use_sliding_value* (``bool``) --
                  True: Only respect date_sliding and date_sliding_type.
                  False: only respect fix date: date_gte and date_lte
                * *include_spam* (``bool``) --
                  True: Include spam in search (Both)
                  False: Spam will be filtered and not respected in search
                * *only_attachment* (``bool``) --
                  True: Only find emails with attachments
                  False: emails with and without attachments (Both)
                * *number_results* (``int``) --
                  Number of total results to return
                * *sort_field* (``str``) --
                  By which field should results be sorted e.g. date, _score, fromEmail.keyword
                * *sort_dir* (``str``) --
                  In Which direction should results be sorted
                  '+': ascending
                  '-': descending)
            :return: ``DslSearch`` Elasticsearch DSL query

            """
        # Query
        fields = [
            'body', 'fromEmail', 'toEmail', 'replyToEmail', 'fromName',
            'toName', 'replyToName', 'subject', 'attachmentNames'
        ]
        if helpers.is_simple_query_string_query(qterm):
            body_query = SimpleQueryString(query=qterm,
                                           fields=fields,
                                           default_operator='AND',
                                           boost=5)
        else:
            body_query = DisMax(tie_breaker=0.7,
                                boost=1,
                                queries=[
                                    SimpleQueryString(query=qterm,
                                                      fields=fields,
                                                      default_operator='AND',
                                                      boost=1),
                                    MatchPhrase(body={
                                        'query': qterm,
                                        'boost': 1
                                    }),
                                ])
        pos = DisMax(tie_breaker=0.7,
                     boost=1,
                     queries=[
                         body_query,
                         Common(body={
                             'query': qterm,
                             'cutoff_frequency': 0.001
                         }),
                     ])

        # penalize if spam
        neg = Match(subject={'query': 'spam'})
        boosting = Boosting(positive=pos, negative=neg, negative_boost=0.2)
        s = s.query(boosting)

        # Get specific query arguments
        include_spam = False
        only_attachment = False
        mailq = ''
        for key, value in kwargs.items():
            if key == 'include_spam':
                include_spam = value
            if key == 'only_attachment':
                only_attachment = value
            if key == 'mailq':
                mailq = value

        # Filter mail
        if mailq != '':
            s = s.filter(Match(**{'fromEmail.keyword':mailq}) | \
                         Match(**{'toEmail.keyword': mailq}) | \
                         Match(**{'replyToEmail.keyword': mailq}))

        # Filter spam
        if not include_spam:
            s = s.filter(~Match(subject={'query': 'spam'}))
            s = s.filter(
                ~Term(spam=1)
            )  # TODO: Spam-flag currently not in use, but for use with different spam filter

        # Filter attachment
        if only_attachment:
            s = s.filter('term', hasAttachment=True)

        # Extra
        s = s.extra(
            indices_boost={
                self._index_prefix.format('ja'): 1.5,
                self._index_prefix.format('en'): 1,
                self._index_prefix.format('un'): 0.5
            })
        # s = s.extra(_source={'excludes': ['body']})  # Body needed, no link available

        # Highlight
        s = s.highlight_options(order='score')
        s = s.highlight('body', fragment_size=50)
        # s = s.highlight('body', number_of_fragments=0)
        s = s.highlight('subject')
        s = s.highlight('fromEmail')
        s = s.highlight('toEmail')
        s = s.highlight('replyToEmail')
        s = s.highlight('fromEmail.keyword')
        s = s.highlight('toEmail.keyword')
        s = s.highlight('replyToEmail.keyword')
        s = s.highlight('fromName')
        s = s.highlight('toName')
        s = s.highlight('replyToName')
        s = s.highlight('attachmentNames')

        return s
示例#13
0
search = Content.search()

print('\n *************************** \nsearch results:')

keywords = [
    '     صدا  و   سیما',
    '   قابل    مقــــایسه',
    'جبران',
    'عوارض'
    ]
regex_list = []
kwargs_list = []
for i in range(0, len(keywords)):
    regex_list.append(get_keyword_regex(keywords[i]))
    kwargs_list.append(SimpleQueryString(query=regex_list[i], fields=['context'], default_operator='and'))
    result = search.query(kwargs_list[i]).execute()
    print('\n' + keywords[i] + ':')
    for hit in result.hits.hits:
        print(hit)

# result = search.query(Q('constant_score', filter=kwargs1)).execute()

# kwargs_exclude = SimpleQueryString(query=regex_list[1], fields=['context'], default_operator='not')

result3 = search.query().exclude(kwargs_list[1]).execute()
result4 = search.query().filter('match_phrase_prefix', context=regex_list[0]).execute()
result5 = search.query().filter('match_phrase', context=regex_list[0]).execute()
result6 = search.query().filter('match_phrase', context=regex_list[2]).execute()
# result7 = search.query().filter('match_phrase', context=regex_list[2]).filter('match_phrase', context=regex_list[3]).execute()  #works
# result7 = search.query().filter(Q("match_phrase",  context=regex_list[2]) & Q("match_phrase", context=regex_list[3])).execute()
示例#14
0
def simple_search(query, fields, page=1):
    query = SimpleQueryString(query=query, fields=fields)
    return search_by_query({
        'query': query.to_dict()
    }, page)
示例#15
0
def simple_scan(query, fields):
    q = SimpleQueryString(query=query, fields=fields)
    return scan_by_query(q)