def test_documents_by_text_should(self): """ https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query Must: The clause (query) must appear in matching documents. Should: The clause (query) should appear in the matching document. In a boolean query with no must clauses, one or more should clauses must match a document. The minimum number of should clauses to match can be set using the minimum_should_match parameter. Here, we assume the soeur document *will* match because there is no must clause. """ global envIdReadOnly global authorizationReadOnly grouped_targets = {CORPUS_ID: []} documents_by_text = DocumentsByText(envIdReadOnly, authorizationReadOnly) should = { 'operator': 'should', 'search_mode': 'language', 'language': 'fr-xx', 'text': 'sœur' } count, documents = documents_by_text.documents_by_text( grouped_targets, [should], 0, 10) self.assertEqual(1, count) document_ids = [document["id"] for document in documents] self.assertIn(ALICE_FR_DOC_ID, document_ids)
def test_documents_by_text_should_sister_french(self): global envIdReadOnly global authorizationReadOnly documents_by_text = DocumentsByText(envIdReadOnly, authorizationReadOnly) grouped_targets = {CORPUS_ID: []} queries = [{ 'operator': 'should', 'search_mode': 'language', 'language': 'fr-xx', 'text': 'sœur' }] count, documents = documents_by_text.documents_by_text( grouped_targets, queries, 0, 10) self.assertEqual(1, count) document_ids = [document["id"] for document in documents] self.assertIn(ALICE_FR_DOC_ID, document_ids)
def test_documents_by_text_english_books(self): global envIdReadOnly global authorizationReadOnly grouped_targets = {CORPUS_ID: []} documents_by_text = DocumentsByText(envIdReadOnly, authorizationReadOnly) queries = [{ 'operator': 'must', 'search_mode': 'language', 'language': 'en-xx', 'text': 'books' }] count, documents = documents_by_text.documents_by_text( grouped_targets, queries, 0, 10) self.assertEqual(1, count) document_ids = [document["id"] for document in documents] self.assertIn(ALICE_EN_DOC_ID, document_ids)
def test_documents_by_text_basic(self): global envIdReadOnly global authorizationReadOnly documents_by_text = DocumentsByText(envIdReadOnly, authorizationReadOnly) grouped_targets = {CORPUS_ID: []} queries = [{ 'operator': 'must', 'search_mode': 'basic', 'language': '', 'text': 'alice' }] count, documents = documents_by_text.documents_by_text( grouped_targets, queries, 0, 10) self.assertEqual(2, count) document_ids = [document["id"] for document in documents] self.assertIn(ALICE_EN_DOC_ID, document_ids) self.assertIn(ALICE_FR_DOC_ID, document_ids)
def test_documents_by_text_must_and_must_not(self): global envIdReadOnly global authorizationReadOnly grouped_targets = {CORPUS_ID: []} documents_by_text = DocumentsByText(envIdReadOnly, authorizationReadOnly) must = { 'operator': 'must', 'search_mode': 'basic', 'language': '', 'text': 'alice' } must_not = { 'operator': 'must_not', 'search_mode': 'language', 'language': 'fr-xx', 'text': 'sœur' } count, documents = documents_by_text.documents_by_text( grouped_targets, [must, must_not], 0, 10) self.assertEqual(1, count) document_ids = [document["id"] for document in documents] self.assertIn(ALICE_EN_DOC_ID, document_ids)