def _search_fields_for_token(self, token):
        q_chain = None
        field_boosts = DefaultQueryParser(self.env).field_boosts

        for field, boost in field_boosts.iteritems():
            if field != 'query_suggestion_basket' and field != 'relations':
                field_token_dict = {field: token}
                if q_chain is None:
                    q_chain = self.solr_interface.Q(**field_token_dict)**boost
                else:
                    q_chain |= self.solr_interface.Q(**field_token_dict)**boost

        return q_chain
Пример #2
0
class MetaKeywordsParsingTestCase(BaseBloodhoundSearchTest):
    def setUp(self):
        super(MetaKeywordsParsingTestCase, self).setUp()
        self.parser = DefaultQueryParser(self.env)

    def test_can_parse_keyword_ticket(self):
        parsed_query = self.parser.parse("$ticket")
        self.assertEqual(parsed_query, query.Term("type", "ticket"))

    def test_can_parse_NOT_keyword_ticket(self):
        parsed_query = self.parser.parse("NOT $ticket")
        self.assertEqual(parsed_query, query.Not(query.Term("type", "ticket")))

    def test_can_parse_keyword_wiki(self):
        parsed_query = self.parser.parse("$wiki")
        self.assertEqual(parsed_query, query.Term("type", "wiki"))

    def test_can_parse_keyword_resolved(self):
        parsed_query = self.parser.parse("$resolved")
        self.assertEqual(parsed_query, query.Or([query.Term("status", "resolved"), query.Term("status", "closed")]))

    def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self):
        parsed_query = self.parser.parse("$unresolved")
        self.assertEqual(
            parsed_query, query.Not(query.Or([query.Term("status", "resolved"), query.Term("status", "closed")]))
        )

    def test_can_parse_complex_query(self):
        parsed_query = self.parser.parse("content:test $ticket $unresolved")

        self.assertEqual(
            parsed_query,
            query.And(
                [
                    query.Term("content", "test"),
                    query.Term("type", "ticket"),
                    query.Not(query.Or([query.Term("status", "resolved"), query.Term("status", "closed")])),
                ]
            ),
        )

    def test_can_parse_keyword_me(self):
        context = self._mock_context_with_username("username")

        parsed_query = self.parser.parse("author:$me", context)

        self.assertEqual(parsed_query, query.Term("author", "username"))

    def test_can_parse_keyword_my(self):
        context = self._mock_context_with_username("username")

        parsed_query = self.parser.parse("$my", context)

        self.assertEqual(parsed_query, query.Term("owner", "username"))

    def _mock_context_with_username(self, username):
        context = Mock(req=Mock(authname=username))
        return context
Пример #3
0
 def setUp(self):
     super(WikiIndexerEventsTestCase, self).setUp()
     self.wiki_system = WikiSystem(self.env)
     self.whoosh_backend = WhooshBackend(self.env)
     self.whoosh_backend.recreate_index()
     self.search_api = BloodhoundSearchApi(self.env)
     self.wiki_participant = WikiSearchParticipant(self.env)
     self.query_parser = DefaultQueryParser(self.env)
Пример #4
0
class WhooshEmptyFacetErrorWorkaroundTestCase(BaseBloodhoundSearchTest):
    def setUp(self):
        super(WhooshEmptyFacetErrorWorkaroundTestCase, self).setUp()
        self.whoosh_backend = WhooshBackend(self.env)
        self.whoosh_backend.recreate_index()
        self.parser = DefaultQueryParser(self.env)
        self.empty_facet_workaround = WhooshEmptyFacetErrorWorkaround(self.env)

    def tearDown(self):
        shutil.rmtree(self.env.path)
        self.env.reset_db()

    def test_set_should_not_be_empty_fields(self):
        self.insert_ticket("test x")
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        doc = result.docs[0]
        null_marker = WhooshEmptyFacetErrorWorkaround.NULL_MARKER
        self.assertEqual(null_marker, doc["component"])
        self.assertEqual(null_marker, doc["status"])
        self.assertEqual(null_marker, doc["milestone"])

    def test_can_fix_query_filter(self):
        parsed_filter = self.parser.parse_filters(
            ["type:ticket", "NOT (milestone:*)"])
        query_parameters = dict(filter=parsed_filter)
        self.empty_facet_workaround.query_pre_process(
            query_parameters)

        result_filter = query_parameters["filter"]
        self.assertEquals('(type:ticket AND milestone:empty)',
            str(result_filter))

    def test_does_interfere_query_filter_if_not_needed(self):
        parsed_filter = self.parser.parse_filters(
            ["type:ticket", "milestone:aaa"])
        query_parameters = dict(filter=parsed_filter)
        self.empty_facet_workaround.query_pre_process(
            query_parameters)

        result_filter = query_parameters["filter"]
        self.assertEquals('(type:ticket AND milestone:aaa)',
            str(result_filter))
Пример #5
0
 def setUp(self):
     super(MetaKeywordsParsingTestCase, self).setUp()
     self.parser = DefaultQueryParser(self.env)
Пример #6
0
class MetaKeywordsParsingTestCase(BaseBloodhoundSearchTest):
    def setUp(self):
        super(MetaKeywordsParsingTestCase, self).setUp()
        self.parser = DefaultQueryParser(self.env)

    def test_can_parse_keyword_ticket(self):
        parsed_query = self.parser.parse("$ticket")
        self.assertEqual(parsed_query, query.Term('type', 'ticket'))

    def test_can_parse_NOT_keyword_ticket(self):
        parsed_query = self.parser.parse("NOT $ticket")
        self.assertEqual(parsed_query, query.Not(query.Term('type', 'ticket')))

    def test_can_parse_keyword_wiki(self):
        parsed_query = self.parser.parse("$wiki")
        self.assertEqual(parsed_query, query.Term('type', 'wiki'))

    def test_can_parse_keyword_resolved(self):
        parsed_query = self.parser.parse("$resolved")
        self.assertEqual(
            parsed_query,
            query.Or([
                query.Term('status', 'resolved'),
                query.Term('status', 'closed')
            ]))

    def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self):
        parsed_query = self.parser.parse("$unresolved")
        self.assertEqual(
            parsed_query,
            query.Not(
                query.Or([
                    query.Term('status', 'resolved'),
                    query.Term('status', 'closed')
                ])))

    def test_can_parse_complex_query(self):
        parsed_query = self.parser.parse("content:test $ticket $unresolved")

        self.assertEqual(
            parsed_query,
            query.And([
                query.Term('content', 'test'),
                query.Term('type', 'ticket'),
                query.Not(
                    query.Or([
                        query.Term('status', 'resolved'),
                        query.Term('status', 'closed')
                    ]))
            ]))

    def test_can_parse_keyword_me(self):
        context = self._mock_context_with_username('username')

        parsed_query = self.parser.parse("author:$me", context)

        self.assertEqual(parsed_query, query.Term('author', 'username'))

    def test_can_parse_keyword_my(self):
        context = self._mock_context_with_username('username')

        parsed_query = self.parser.parse("$my", context)

        self.assertEqual(parsed_query, query.Term('owner', 'username'))

    def _mock_context_with_username(self, username):
        context = Mock(req=Mock(authname=username))
        return context
Пример #7
0
 def setUp(self):
     super(WhooshBackendTestCase, self).setUp()
     self.whoosh_backend = WhooshBackend(self.env)
     self.whoosh_backend.recreate_index()
     self.parser = DefaultQueryParser(self.env)
Пример #8
0
class WhooshBackendTestCase(BaseBloodhoundSearchTest):
    def setUp(self):
        super(WhooshBackendTestCase, self).setUp()
        self.whoosh_backend = WhooshBackend(self.env)
        self.whoosh_backend.recreate_index()
        self.parser = DefaultQueryParser(self.env)

    def test_can_retrieve_docs(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort=[SortInstruction("id", ASC)],
        )
        self.print_result(result)
        self.assertEqual(2, result.hits)
        docs = result.docs
        self.assertEqual(
            {
                'id': u'1',
                'type': u'ticket',
                'unique_id': u'ticket:1',
                'score': 0
            }, docs[0])
        self.assertEqual(
            {
                'id': u'2',
                'type': u'ticket',
                'unique_id': u'ticket:2',
                'score': 1
            }, docs[1])

    def test_can_return_all_fields(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        docs = result.docs
        self.assertEqual(
            {
                'id': u'1',
                'type': u'ticket',
                'unique_id': u'ticket:1',
                "score": 1.0
            }, docs[0])

    def test_can_select_fields(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        result = self.whoosh_backend.query(query.Every(),
                                           fields=("id", "type"))
        self.print_result(result)
        docs = result.docs
        self.assertEqual({'id': '1', 'type': 'ticket'}, docs[0])

    def test_can_survive_after_restart(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        whoosh_backend2 = WhooshBackend(self.env)
        whoosh_backend2.add_doc(dict(id="2", type="ticket"))
        result = whoosh_backend2.query(query.Every())
        self.assertEqual(2, result.hits)

    def test_can_apply_multiple_sort_conditions_asc(self):
        self.whoosh_backend.add_doc(dict(id="2", type="ticket2"))
        self.whoosh_backend.add_doc(dict(id="3", type="ticket1"))
        self.whoosh_backend.add_doc(dict(id="4", type="ticket3"))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket1"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort=[SortInstruction("type", ASC),
                  SortInstruction("id", ASC)],
            fields=("id", "type"),
        )
        self.print_result(result)
        self.assertEqual([{
            'type': 'ticket1',
            'id': '1'
        }, {
            'type': 'ticket1',
            'id': '3'
        }, {
            'type': 'ticket2',
            'id': '2'
        }, {
            'type': 'ticket3',
            'id': '4'
        }], result.docs)

    def test_can_apply_multiple_sort_conditions_desc(self):
        self.whoosh_backend.add_doc(dict(id="2", type="ticket2"))
        self.whoosh_backend.add_doc(dict(id="3", type="ticket1"))
        self.whoosh_backend.add_doc(dict(id="4", type="ticket3"))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket1"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort=[SortInstruction("type", ASC),
                  SortInstruction("id", DESC)],
            fields=("id", "type"),
        )
        self.print_result(result)
        self.assertEqual([{
            'type': 'ticket1',
            'id': '3'
        }, {
            'type': 'ticket1',
            'id': '1'
        }, {
            'type': 'ticket2',
            'id': '2'
        }, {
            'type': 'ticket3',
            'id': '4'
        }], result.docs)

    def test_can_sort_by_score_and_date(self):
        the_first_date = datetime(2012, 12, 1)
        the_second_date = datetime(2012, 12, 2)
        the_third_date = datetime(2012, 12, 3)

        exact_match_string = "texttofind"
        not_exact_match_string = "texttofind bla"

        self.whoosh_backend.add_doc(
            dict(
                id="1",
                type="ticket",
                summary=not_exact_match_string,
                time=the_first_date,
            ))
        self.whoosh_backend.add_doc(
            dict(
                id="2",
                type="ticket",
                summary=exact_match_string,
                time=the_second_date,
            ))
        self.whoosh_backend.add_doc(
            dict(
                id="3",
                type="ticket",
                summary=not_exact_match_string,
                time=the_third_date,
            ))
        self.whoosh_backend.add_doc(
            dict(
                id="4",
                type="ticket",
                summary="some text out of search scope",
                time=the_third_date,
            ))

        parsed_query = self.parser.parse("summary:texttofind")

        result = self.whoosh_backend.query(
            parsed_query,
            sort=[SortInstruction(SCORE, ASC),
                  SortInstruction("time", DESC)],
        )
        self.print_result(result)
        self.assertEqual(3, result.hits)
        docs = result.docs
        #must be found first, because the highest score (of exact match)
        self.assertEqual("2", docs[0]["id"])
        #must be found second, because the time order DESC
        self.assertEqual("3", docs[1]["id"])
        #must be found third, because the time order DESC
        self.assertEqual("1", docs[2]["id"])

    def test_can_do_facet_count(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", product="A"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", product="B"))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki", product="A"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort=[SortInstruction("type", ASC),
                  SortInstruction("id", DESC)],
            fields=("id", "type"),
            facets=("type", "product"))
        self.print_result(result)
        self.assertEqual(3, result.hits)
        facets = result.facets
        self.assertEqual({"ticket": 2, "wiki": 1}, facets["type"])
        self.assertEqual({"A": 2, "B": 1}, facets["product"])

    def test_can_do_facet_if_filed_missing_TODO(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", status="New"))
        result = self.whoosh_backend.query(query.Every(),
                                           facets=("type", "status"))
        self.print_result(result)
        self.assertEqual(2, result.hits)
        facets = result.facets
        self.assertEqual({"ticket": 2}, facets["type"])
        self.assertEqual({None: 1, 'New': 1}, facets["status"])

    def test_can_return_empty_result(self):
        result = self.whoosh_backend.query(
            query.Every(),
            sort=[SortInstruction("type", ASC),
                  SortInstruction("id", DESC)],
            fields=("id", "type"),
            facets=("type", "product"))
        self.print_result(result)
        self.assertEqual(0, result.hits)

    def test_can_search_time_with_utc_tzinfo(self):
        time = datetime(2012,
                        12,
                        13,
                        11,
                        8,
                        34,
                        711957,
                        tzinfo=FixedOffset(0, 'UTC'))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertEqual(time, result.docs[0]["time"])

    def test_can_search_time_without_tzinfo(self):
        time = datetime(2012, 12, 13, 11, 8, 34, 711957, tzinfo=None)
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertEqual(time.replace(tzinfo=utc), result.docs[0]["time"])

    def test_can_search_time_with_non_utc_tzinfo(self):
        hours = 8
        tz_diff = 1
        time = datetime(2012,
                        12,
                        13,
                        11,
                        hours,
                        34,
                        711957,
                        tzinfo=FixedOffset(tz_diff, "just_one_timezone"))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertEqual(
            datetime(2012, 12, 13, 11, hours - tz_diff, 34, 711957,
                     tzinfo=utc), result.docs[0]["time"])

    def test_can_apply_filter_and_facet(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="wiki"))
        result = self.whoosh_backend.query(query.Every(),
                                           filter=query.Term("type", "ticket"),
                                           facets=["type"])
        self.print_result(result)
        self.assertEqual(1, result.hits)
        self.assertEqual("ticket", result.docs[0]["type"])

    @unittest.skip("TODO clarify behavior on Whoosh mail list")
    def test_can_search_id_and_summary_TODO(self):
        #arrange
        self.insert_ticket("test x")
        self.insert_ticket("test 1")

        fieldboosts = dict(
            id=1,
            summary=1,
        )

        mfp = MultifieldPlugin(list(fieldboosts.keys()), )
        pins = [WhitespacePlugin, PhrasePlugin, mfp]
        parser = QueryParser(None, WhooshBackend.SCHEMA, plugins=pins)

        parsed_query = parser.parse("1")
        result = self.whoosh_backend.query(parsed_query)
        self.print_result(result)
        self.assertEqual(2, result.hits)

    def test_no_index_error_when_counting_facet_on_missing_field(self):
        """
        Whoosh 2.4.1 raises "IndexError: list index out of range"
        when search contains facets on field that is missing in at least one
        document in the index. The error manifests only when index contains
        more than one segment

        Introduced workaround should solve this problem.
        """
        #add more tickets to make sure we have more than one segment in index
        count = 20
        for i in range(count):
            self.insert_ticket("test %s" % (i))

        result = self.whoosh_backend.query(query.Every(), facets=["milestone"])
        self.assertEquals(count, result.hits)

    def test_can_query_missing_field_and_type(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", milestone="A"))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki"))
        filter = self.parser.parse_filters(
            ["NOT (milestone:*)", "type:ticket"])
        result = self.whoosh_backend.query(
            query.Every(),
            filter=filter,
        )
        self.print_result(result)
        self.assertEqual(1, result.hits)
        self.assertEqual("1", result.docs[0]["id"])

    def test_can_query_missing_field(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", milestone="A"))
        filter = self.parser.parse_filters(["NOT (milestone:*)"])
        result = self.whoosh_backend.query(
            query.Every(),
            filter=filter,
        )
        self.print_result(result)
        self.assertEqual(1, result.hits)
        self.assertEqual("1", result.docs[0]["id"])

    @unittest.skip("TODO clarify behavior on Whoosh mail list")
    def test_can_query_missing_field_and_type_with_no_results(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki"))
        filter = self.parser.parse_filters(
            ["NOT (milestone:*)", "type:ticket"])
        result = self.whoosh_backend.query(
            query.Every(),
            filter=filter,
        )
        self.print_result(result)
        self.assertEqual(0, result.hits)

    def test_can_highlight_given_terms(self):
        term = 'search_term'
        text = "foo foo %s bar bar" % term
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki", content=text))
        search_query = self.parser.parse(term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
            highlight_fields=['content', 'summary'])
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 2)
        for highlight in result.highlighting:
            self.assertIn(self._highlighted(term), highlight['content'])
            self.assertEquals("", highlight['summary'])

    def test_that_highlighting_escapes_html(self):
        term = 'search_term'
        text = "bla <a href=''>%s bar</a> bla" % term
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
        search_query = self.parser.parse(term)

        result = self.whoosh_backend.query(search_query,
                                           highlight=True,
                                           highlight_fields=['content'])
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 1)
        highlight = result.highlighting[0]
        self.assertEquals(
            "bla &lt;a href=''&gt;<em>search_term</em> bar&lt;/a&gt; bla",
            highlight['content'])

    def test_highlights_all_text_fields_by_default(self):
        term = 'search_term'
        text = "foo foo %s bar bar" % term
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki", content=text))
        search_query = self.parser.parse(term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
        )
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 2)
        for highlight in result.highlighting:
            self.assertIn('content', highlight)
            self.assertIn('summary', highlight)
            self.assertIn(self._highlighted(term), highlight['content'])

    def test_only_highlights_terms_in_fields_that_match_query(self):
        term = 'search_term'
        self.whoosh_backend.add_doc(dict(id=term, type="wiki", content=term))
        self.whoosh_backend.add_doc(dict(id=term, type="ticket", summary=term))
        search_query = self.parser.parse('id:%s' % term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
            highlight_fields=["id", "content", "summary"])
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 2)
        for highlight in result.highlighting:
            self.assertIn(self._highlighted(term), highlight['id'])
            self.assertNotIn(self._highlighted(term), highlight['summary'])
            self.assertNotIn(self._highlighted(term), highlight['content'])

    def _highlighted(self, term):
        return '<em>%s</em>' % term
Пример #9
0
 def setUp(self):
     super(ApiQueryWithWhooshTestCase, self).setUp(create_req=True)
     WhooshBackend(self.env).recreate_index()
     self.search_api = BloodhoundSearchApi(self.env)
     self.ticket_participant = TicketSearchParticipant(self.env)
     self.query_parser = DefaultQueryParser(self.env)
Пример #10
0
 def setUp(self):
     super(WhooshEmptyFacetErrorWorkaroundTestCase, self).setUp()
     self.whoosh_backend = WhooshBackend(self.env)
     self.whoosh_backend.recreate_index()
     self.parser = DefaultQueryParser(self.env)
     self.empty_facet_workaround = WhooshEmptyFacetErrorWorkaround(self.env)
Пример #11
0
 def setUp(self):
     super(WhooshBackendTestCase, self).setUp()
     self.whoosh_backend = WhooshBackend(self.env)
     self.whoosh_backend.recreate_index()
     self.parser = DefaultQueryParser(self.env)
Пример #12
0
class WhooshBackendTestCase(BaseBloodhoundSearchTest):
    def setUp(self):
        super(WhooshBackendTestCase, self).setUp()
        self.whoosh_backend = WhooshBackend(self.env)
        self.whoosh_backend.recreate_index()
        self.parser = DefaultQueryParser(self.env)

    def test_can_retrieve_docs(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort = [SortInstruction("id", ASC)],
        )
        self.print_result(result)
        self.assertEqual(2, result.hits)
        docs = result.docs
        self.assertEqual(
            {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
             'score': u'1'},
            docs[0])
        self.assertEqual(
            {'id': u'2', 'type': u'ticket', 'unique_id': u'empty:ticket:2',
             'score': u'2'},
            docs[1])

    def test_can_return_all_fields(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        docs = result.docs
        self.assertEqual(
            {'id': u'1', 'type': u'ticket', 'unique_id': u'empty:ticket:1',
                "score": 1.0},
            docs[0])

    def test_can_select_fields(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        result = self.whoosh_backend.query(query.Every(),
            fields=("id", "type"))
        self.print_result(result)
        docs = result.docs
        self.assertEqual(
            {'id': '1', 'type': 'ticket'},
            docs[0])

    def test_can_survive_after_restart(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        whoosh_backend2 = WhooshBackend(self.env)
        whoosh_backend2.add_doc(dict(id="2", type="ticket"))
        result = whoosh_backend2.query(query.Every())
        self.assertEqual(2, result.hits)

    def test_can_apply_multiple_sort_conditions_asc(self):
        self.whoosh_backend.add_doc(dict(id="2", type="ticket2"))
        self.whoosh_backend.add_doc(dict(id="3", type="ticket1"))
        self.whoosh_backend.add_doc(dict(id="4", type="ticket3"))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket1"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort = [SortInstruction("type", ASC), SortInstruction("id", ASC)],
            fields=("id", "type"),
        )
        self.print_result(result)
        self.assertEqual([{'type': 'ticket1', 'id': '1'},
                          {'type': 'ticket1', 'id': '3'},
                          {'type': 'ticket2', 'id': '2'},
                          {'type': 'ticket3', 'id': '4'}],
            result.docs)

    def test_can_apply_multiple_sort_conditions_desc(self):
        self.whoosh_backend.add_doc(dict(id="2", type="ticket2"))
        self.whoosh_backend.add_doc(dict(id="3", type="ticket1"))
        self.whoosh_backend.add_doc(dict(id="4", type="ticket3"))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket1"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort = [SortInstruction("type", ASC), SortInstruction("id", DESC)],
            fields=("id", "type"),
        )
        self.print_result(result)
        self.assertEqual([{'type': 'ticket1', 'id': '3'},
                          {'type': 'ticket1', 'id': '1'},
                          {'type': 'ticket2', 'id': '2'},
                          {'type': 'ticket3', 'id': '4'}],
            result.docs)

    def test_can_sort_by_score_and_date(self):
        the_first_date = datetime(2012, 12, 1)
        the_second_date = datetime(2012, 12, 2)
        the_third_date = datetime(2012, 12, 3)

        exact_match_string = "texttofind"
        not_exact_match_string = "texttofind bla"

        self.whoosh_backend.add_doc(dict(
            id="1",
            type="ticket",
            summary=not_exact_match_string,
            time=the_first_date,
        ))
        self.whoosh_backend.add_doc(dict(
            id="2",
            type="ticket",
            summary=exact_match_string,
            time=the_second_date,
        ))
        self.whoosh_backend.add_doc(dict(
            id="3",
            type="ticket",
            summary=not_exact_match_string,
            time=the_third_date,
        ))
        self.whoosh_backend.add_doc(dict(
            id="4",
            type="ticket",
            summary="some text out of search scope",
            time=the_third_date,
        ))

        parsed_query = self.parser.parse("summary:texttofind")

        result = self.whoosh_backend.query(
            parsed_query,
            sort = [
                SortInstruction(SCORE, ASC),
                SortInstruction("time", DESC)
            ],
        )
        self.print_result(result)
        self.assertEqual(3, result.hits)
        docs = result.docs
        #must be found first, because the highest score (of exact match)
        self.assertEqual("2", docs[0]["id"])
        #must be found second, because the time order DESC
        self.assertEqual("3", docs[1]["id"])
        #must be found third, because the time order DESC
        self.assertEqual("1", docs[2]["id"])

    def test_can_do_facet_count(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", product="A"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", product="B"))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki", product="A"))
        result = self.whoosh_backend.query(
            query.Every(),
            sort = [SortInstruction("type", ASC), SortInstruction("id", DESC)],
            fields=("id", "type"),
            facets= ("type", "product")
        )
        self.print_result(result)
        self.assertEqual(3, result.hits)
        facets = result.facets
        self.assertEqual({"ticket":2, "wiki":1}, facets["type"])
        self.assertEqual({"A":2, "B":1}, facets["product"])

    def test_can_do_facet_if_filed_missing_TODO(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", status="New"))
        result = self.whoosh_backend.query(
            query.Every(),
            facets= ("type", "status")
        )
        self.print_result(result)
        self.assertEqual(2, result.hits)
        facets = result.facets
        self.assertEqual({"ticket":2}, facets["type"])
        self.assertEqual({None: 1, 'New': 1}, facets["status"])

    def test_can_return_empty_result(self):
        result = self.whoosh_backend.query(
            query.Every(),
            sort = [SortInstruction("type", ASC), SortInstruction("id", DESC)],
            fields=("id", "type"),
            facets= ("type", "product")
        )
        self.print_result(result)
        self.assertEqual(0, result.hits)

    def test_can_search_time_with_utc_tzinfo(self):
        time = datetime(2012, 12, 13, 11, 8, 34, 711957,
            tzinfo=FixedOffset(0, 'UTC'))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertEqual(time, result.docs[0]["time"])

    def test_can_search_time_without_tzinfo(self):
        time = datetime(2012, 12, 13, 11, 8, 34, 711957, tzinfo=None)
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertEqual(time.replace(tzinfo=utc), result.docs[0]["time"])

    def test_can_search_time_with_non_utc_tzinfo(self):
        hours = 8
        tz_diff = 1
        time = datetime(2012, 12, 13, 11, hours, 34, 711957,
            tzinfo=FixedOffset(tz_diff, "just_one_timezone"))
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", time=time))
        result = self.whoosh_backend.query(query.Every())
        self.print_result(result)
        self.assertEqual(datetime(2012, 12, 13, 11, hours-tz_diff, 34, 711957,
                    tzinfo=utc), result.docs[0]["time"])


    def test_can_apply_filter_and_facet(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="wiki" ))
        result = self.whoosh_backend.query(
            query.Every(),
            filter=query.Term("type", "ticket"),
            facets=["type"]
        )
        self.print_result(result)
        self.assertEqual(1, result.hits)
        self.assertEqual("ticket", result.docs[0]["type"])

    @unittest.skip("TODO clarify behavior on Whoosh mail list")
    def test_can_search_id_and_summary_TODO(self):
        #arrange
        self.insert_ticket("test x")
        self.insert_ticket("test 1")

        fieldboosts = dict(
            id = 1,
            summary = 1,
        )

        mfp = MultifieldPlugin(list(fieldboosts.keys()),)
        pins = [WhitespacePlugin,
                PhrasePlugin,
                mfp]
        parser =  QueryParser(None, WhooshBackend.SCHEMA, plugins=pins)

        parsed_query = parser.parse("1")
        result = self.whoosh_backend.query(parsed_query)
        self.print_result(result)
        self.assertEqual(2, result.hits)

    def test_no_index_error_when_counting_facet_on_missing_field(self):
        """
        Whoosh 2.4.1 raises "IndexError: list index out of range"
        when search contains facets on field that is missing in at least one
        document in the index. The error manifests only when index contains
        more than one segment

        Introduced workaround should solve this problem.
        """
        #add more tickets to make sure we have more than one segment in index
        count = 20
        for i in range(count):
            self.insert_ticket("test %s" % (i))

        result = self.whoosh_backend.query(
            query.Every(),
            facets=["milestone"]
        )
        self.assertEquals(count, result.hits)

    def test_can_query_missing_field_and_type(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", milestone="A"))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki"))
        filter = self.parser.parse_filters(["NOT (milestone:*)", "type:ticket"])
        result = self.whoosh_backend.query(
            query.Every(),
            filter=filter,
        )
        self.print_result(result)
        self.assertEqual(1, result.hits)
        self.assertEqual("1", result.docs[0]["id"])


    def test_can_query_missing_field(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="2", type="ticket", milestone="A"))
        filter = self.parser.parse_filters(["NOT (milestone:*)"])
        result = self.whoosh_backend.query(
            query.Every(),
            filter=filter,
        )
        self.print_result(result)
        self.assertEqual(1, result.hits)
        self.assertEqual("1", result.docs[0]["id"])


    @unittest.skip("TODO clarify behavior on Whoosh mail list")
    def test_can_query_missing_field_and_type_with_no_results(self):
        self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki"))
        filter = self.parser.parse_filters(["NOT (milestone:*)", "type:ticket"])
        result = self.whoosh_backend.query(
            query.Every(),
            filter=filter,
        )
        self.print_result(result)
        self.assertEqual(0, result.hits)

    def test_can_highlight_given_terms(self):
        term = 'search_term'
        text = "foo foo %s bar bar" % term
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki", content=text))
        search_query = self.parser.parse(term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
            highlight_fields=['content', 'summary']
        )
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 2)
        for highlight in result.highlighting:
            self.assertIn(self._highlighted(term), highlight['content'])
            self.assertEquals("", highlight['summary'])

    def test_that_highlighting_escapes_html(self):
        term = 'search_term'
        text = "bla <a href=''>%s bar</a> bla" % term
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
        search_query = self.parser.parse(term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
            highlight_fields=['content']
        )
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 1)
        highlight = result.highlighting[0]
        self.assertEquals(
            "bla &lt;a href=''&gt;<em>search_term</em> bar&lt;/a&gt; bla",
            highlight['content'])

    def test_highlights_all_text_fields_by_default(self):
        term = 'search_term'
        text = "foo foo %s bar bar" % term
        self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
        self.whoosh_backend.add_doc(dict(id="3", type="wiki", content=text))
        search_query = self.parser.parse(term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
        )
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 2)
        for highlight in result.highlighting:
            self.assertIn('content', highlight)
            self.assertIn('summary', highlight)
            self.assertIn(self._highlighted(term), highlight['content'])

    def test_only_highlights_terms_in_fields_that_match_query(self):
        term = 'search_term'
        self.whoosh_backend.add_doc(dict(id=term, type="wiki", content=term))
        self.whoosh_backend.add_doc(dict(id=term, type="ticket", summary=term))
        search_query = self.parser.parse('id:%s' % term)

        result = self.whoosh_backend.query(
            search_query,
            highlight=True,
            highlight_fields=["id", "content", "summary"]
        )
        self.print_result(result)

        self.assertEqual(len(result.highlighting), 2)
        for highlight in result.highlighting:
            self.assertIn(self._highlighted(term), highlight['id'])
            self.assertNotIn(self._highlighted(term), highlight['summary'])
            self.assertNotIn(self._highlighted(term), highlight['content'])

    def _highlighted(self, term):
        return '<em>%s</em>' % term
Пример #13
0
 def setUp(self):
     super(MetaKeywordsParsingTestCase, self).setUp()
     self.parser = DefaultQueryParser(self.env)
Пример #14
0
class MetaKeywordsParsingTestCase(BaseBloodhoundSearchTest):
    def setUp(self):
        super(MetaKeywordsParsingTestCase, self).setUp()
        self.parser = DefaultQueryParser(self.env)

    def test_can_parse_keyword_ticket(self):
        parsed_query = self.parser.parse("$ticket")
        self.assertEqual(parsed_query, terms.Term('type', 'ticket'))

    def test_can_parse_NOT_keyword_ticket(self):
        parsed_query = self.parser.parse("NOT $ticket")
        self.assertEqual(parsed_query,
                         wrappers.Not(
                             terms.Term('type', 'ticket')))

    def test_can_parse_keyword_wiki(self):
        parsed_query = self.parser.parse("$wiki")
        self.assertEqual(parsed_query, terms.Term('type', 'wiki'))

    def test_can_parse_keyword_resolved(self):
        parsed_query = self.parser.parse("$resolved")
        self.assertEqual(parsed_query,
                         nary.Or([terms.Term('status', 'resolved'),
                                  terms.Term('status', 'closed')]))

    def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self):
        parsed_query = self.parser.parse("$unresolved")
        self.assertEqual(parsed_query,
                         wrappers.Not(
                         nary.Or([terms.Term('status', 'resolved'),
                                  terms.Term('status', 'closed')])))

    def test_can_parse_complex_query(self):
        parsed_query = self.parser.parse("content:test $ticket $unresolved")

        self.assertEqual(parsed_query,
                         nary.And([
                             terms.Term('content', 'test'),
                             terms.Term('type', 'ticket'),
                             wrappers.Not(
                                 nary.Or([terms.Term('status', 'resolved'),
                                          terms.Term('status', 'closed')])
                             )
                         ]))

    def test_can_parse_keyword_me(self):
        context = self._mock_context_with_username('username')

        parsed_query = self.parser.parse("author:$me", context)

        self.assertEqual(parsed_query, terms.Term('author', 'username'))

    def test_can_parse_keyword_my(self):
        context = self._mock_context_with_username('username')

        parsed_query = self.parser.parse("$my", context)

        self.assertEqual(parsed_query, terms.Term('owner', 'username'))

    def _mock_context_with_username(self, username):
        context = Mock(
            req=Mock(
                authname=username
            )
        )
        return context