Exemplo n.º 1
0
    def test_search_more_like(self):
        d1 = Document.objects.create(
            title="invoice",
            content="the thing i bought at a shop and paid with bank account",
            checksum="A",
            pk=1)
        d2 = Document.objects.create(title="bank statement 1",
                                     content="things i paid for in august",
                                     pk=2,
                                     checksum="B")
        d3 = Document.objects.create(title="bank statement 3",
                                     content="things i paid for in september",
                                     pk=3,
                                     checksum="C")
        with AsyncWriter(index.open_index()) as writer:
            index.update_document(writer, d1)
            index.update_document(writer, d2)
            index.update_document(writer, d3)

        response = self.client.get(f"/api/search/?more_like={d2.id}")

        self.assertEqual(response.status_code, 200)

        results = response.data['results']

        self.assertEqual(len(results), 2)
        self.assertEqual(results[0]['id'], d3.id)
        self.assertEqual(results[1]['id'], d1.id)
Exemplo n.º 2
0
    def test_search_invalid_page(self):
        with index.open_index(False).writer() as writer:
            for i in range(15):
                doc = Document.objects.create(checksum=str(i),
                                              pk=i + 1,
                                              title=f"Document {i+1}",
                                              content="content")
                index.update_document(writer, doc)

        first_page = self.client.get(f"/api/search/?query=content&page=1").data
        second_page = self.client.get(
            f"/api/search/?query=content&page=2").data
        should_be_first_page_1 = self.client.get(
            f"/api/search/?query=content&page=0").data
        should_be_first_page_2 = self.client.get(
            f"/api/search/?query=content&page=dgfd").data
        should_be_first_page_3 = self.client.get(
            f"/api/search/?query=content&page=").data
        should_be_first_page_4 = self.client.get(
            f"/api/search/?query=content&page=-7868").data

        self.assertDictEqual(first_page, should_be_first_page_1)
        self.assertDictEqual(first_page, should_be_first_page_2)
        self.assertDictEqual(first_page, should_be_first_page_3)
        self.assertDictEqual(first_page, should_be_first_page_4)
        self.assertNotEqual(len(first_page['results']),
                            len(second_page['results']))
Exemplo n.º 3
0
    def test_auto_complete(self):

        doc1 = Document.objects.create(title="doc1",
                                       checksum="A",
                                       content="test test2 test3")
        doc2 = Document.objects.create(title="doc2",
                                       checksum="B",
                                       content="test test2")
        doc3 = Document.objects.create(title="doc3",
                                       checksum="C",
                                       content="test2")

        index.add_or_update_document(doc1)
        index.add_or_update_document(doc2)
        index.add_or_update_document(doc3)

        ix = index.open_index()

        self.assertListEqual(index.autocomplete(ix, "tes"),
                             [b"test3", b"test", b"test2"])
        self.assertListEqual(index.autocomplete(ix, "tes", limit=3),
                             [b"test3", b"test", b"test2"])
        self.assertListEqual(index.autocomplete(ix, "tes", limit=1),
                             [b"test3"])
        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
Exemplo n.º 4
0
def index_reindex():
    documents = Document.objects.all()

    ix = index.open_index(recreate=True)

    with AsyncWriter(ix) as writer:
        for document in tqdm.tqdm(documents):
            index.update_document(writer, document)
Exemplo n.º 5
0
def index_reindex(progress_bar_disable=False):
    documents = Document.objects.all()

    ix = index.open_index(recreate=True)

    with AsyncWriter(ix) as writer:
        for document in tqdm.tqdm(documents, disable=progress_bar_disable):
            index.update_document(writer, document)
Exemplo n.º 6
0
def delete(doc_ids):
    Document.objects.filter(id__in=doc_ids).delete()

    ix = index.open_index()
    with AsyncWriter(ix) as writer:
        for id in doc_ids:
            index.remove_document_by_id(writer, id)

    return "OK"
Exemplo n.º 7
0
def bulk_update_documents(document_ids):
    documents = Document.objects.filter(id__in=document_ids)

    ix = index.open_index()

    for doc in documents:
        post_save.send(Document, instance=doc, created=False)

    with AsyncWriter(ix) as writer:
        for doc in documents:
            index.update_document(writer, doc)
Exemplo n.º 8
0
    def get(self, request, format=None):
        from documents import index

        if 'query' in request.query_params:
            query = request.query_params['query']
        else:
            query = None

        if 'more_like' in request.query_params:
            more_like_id = request.query_params['more_like']
            more_like_content = Document.objects.get(id=more_like_id).content
        else:
            more_like_id = None
            more_like_content = None

        if not query and not more_like_id:
            return Response({
                'count': 0,
                'page': 0,
                'page_count': 0,
                'corrected_query': None,
                'results': []
            })

        try:
            page = int(request.query_params.get('page', 1))
        except (ValueError, TypeError):
            page = 1

        if page < 1:
            page = 1

        ix = index.open_index()

        try:
            with index.query_page(
                    ix, page, query, more_like_id,
                    more_like_content) as (result_page,
                                           corrected_query):  # NOQA: E501
                return Response({
                    'count':
                    len(result_page),
                    'page':
                    result_page.pagenum,
                    'page_count':
                    result_page.pagecount,
                    'corrected_query':
                    corrected_query,
                    'results':
                    list(map(self.add_infos_to_hit, result_page))
                })
        except Exception as e:
            return HttpResponseBadRequest(str(e))
Exemplo n.º 9
0
    def test_search_spelling_correction(self):
        with AsyncWriter(index.open_index()) as writer:
            for i in range(55):
                doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}")
                index.update_document(writer, doc)

        response = self.client.get("/api/search/?query=thing")
        correction = response.data['corrected_query']

        self.assertEqual(correction, "things")

        response = self.client.get("/api/search/?query=things")
        correction = response.data['corrected_query']

        self.assertEqual(correction, None)
Exemplo n.º 10
0
    def test_search(self):
        d1 = Document.objects.create(
            title="invoice",
            content="the thing i bought at a shop and paid with bank account",
            checksum="A",
            pk=1)
        d2 = Document.objects.create(title="bank statement 1",
                                     content="things i paid for in august",
                                     pk=2,
                                     checksum="B")
        d3 = Document.objects.create(title="bank statement 3",
                                     content="things i paid for in september",
                                     pk=3,
                                     checksum="C")
        with index.open_index(False).writer() as writer:
            # Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
            # (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
            # That's why we cant open the writer in a model on_save handler or something.
            index.update_document(writer, d1)
            index.update_document(writer, d2)
            index.update_document(writer, d3)
        response = self.client.get("/api/search/?query=bank")
        results = response.data['results']
        self.assertEqual(response.data['count'], 3)
        self.assertEqual(response.data['page'], 1)
        self.assertEqual(response.data['page_count'], 1)
        self.assertEqual(len(results), 3)

        response = self.client.get("/api/search/?query=september")
        results = response.data['results']
        self.assertEqual(response.data['count'], 1)
        self.assertEqual(response.data['page'], 1)
        self.assertEqual(response.data['page_count'], 1)
        self.assertEqual(len(results), 1)

        response = self.client.get("/api/search/?query=statement")
        results = response.data['results']
        self.assertEqual(response.data['count'], 2)
        self.assertEqual(response.data['page'], 1)
        self.assertEqual(response.data['page_count'], 1)
        self.assertEqual(len(results), 2)

        response = self.client.get("/api/search/?query=sfegdfg")
        results = response.data['results']
        self.assertEqual(response.data['count'], 0)
        self.assertEqual(response.data['page'], 0)
        self.assertEqual(response.data['page_count'], 0)
        self.assertEqual(len(results), 0)
Exemplo n.º 11
0
    def get(self, request, format=None):
        if 'term' in request.query_params:
            term = request.query_params['term']
        else:
            return HttpResponseBadRequest("Term required")

        if 'limit' in request.query_params:
            limit = int(request.query_params['limit'])
            if limit <= 0:
                return HttpResponseBadRequest("Invalid limit")
        else:
            limit = 10

        from documents import index

        ix = index.open_index()

        return Response(index.autocomplete(ix, term, limit))
Exemplo n.º 12
0
    def test_search_multi_page(self):
        with index.open_index(False).writer() as writer:
            for i in range(55):
                doc = Document.objects.create(checksum=str(i),
                                              pk=i + 1,
                                              title=f"Document {i+1}",
                                              content="content")
                index.update_document(writer, doc)

        # This is here so that we test that no document gets returned twice (might happen if the paging is not working)
        seen_ids = []

        for i in range(1, 6):
            response = self.client.get(f"/api/search/?query=content&page={i}")
            results = response.data['results']
            self.assertEqual(response.data['count'], 55)
            self.assertEqual(response.data['page'], i)
            self.assertEqual(response.data['page_count'], 6)
            self.assertEqual(len(results), 10)

            for result in results:
                self.assertNotIn(result['id'], seen_ids)
                seen_ids.append(result['id'])

        response = self.client.get(f"/api/search/?query=content&page=6")
        results = response.data['results']
        self.assertEqual(response.data['count'], 55)
        self.assertEqual(response.data['page'], 6)
        self.assertEqual(response.data['page_count'], 6)
        self.assertEqual(len(results), 5)

        for result in results:
            self.assertNotIn(result['id'], seen_ids)
            seen_ids.append(result['id'])

        response = self.client.get(f"/api/search/?query=content&page=7")
        results = response.data['results']
        self.assertEqual(response.data['count'], 55)
        self.assertEqual(response.data['page'], 6)
        self.assertEqual(response.data['page_count'], 6)
        self.assertEqual(len(results), 5)
Exemplo n.º 13
0
 def __init__(self, *args, **kwargs):
     super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
     self.ix = index.open_index()
Exemplo n.º 14
0
def index_optimize():
    ix = index.open_index()
    writer = AsyncWriter(ix)
    writer.commit(optimize=True)
Exemplo n.º 15
0
 def get_document_from_index(self, doc):
     ix = index.open_index()
     with ix.searcher() as searcher:
         return searcher.document(id=doc.id)
Exemplo n.º 16
0
def index_optimize():
    index.open_index().optimize()