示例#1
0
    def test_documents_by_text_should(self):
        """
        https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query
        Must: The clause (query) must appear in matching documents.

        Should: The clause (query) should appear in the matching document. In a boolean query with no must clauses,
        one or more should clauses must match a document. The minimum number of should clauses to match can be set
        using the minimum_should_match parameter.

        Here, we assume the soeur document *will* match because there is no must clause.
        """
        global envIdReadOnly
        global authorizationReadOnly
        grouped_targets = {CORPUS_ID: []}
        documents_by_text = DocumentsByText(envIdReadOnly,
                                            authorizationReadOnly)
        should = {
            'operator': 'should',
            'search_mode': 'language',
            'language': 'fr-xx',
            'text': 'sœur'
        }
        count, documents = documents_by_text.documents_by_text(
            grouped_targets, [should], 0, 10)
        self.assertEqual(1, count)
        document_ids = [document["id"] for document in documents]
        self.assertIn(ALICE_FR_DOC_ID, document_ids)
示例#2
0
 def test_documents_by_text_should_sister_french(self):
     global envIdReadOnly
     global authorizationReadOnly
     documents_by_text = DocumentsByText(envIdReadOnly,
                                         authorizationReadOnly)
     grouped_targets = {CORPUS_ID: []}
     queries = [{
         'operator': 'should',
         'search_mode': 'language',
         'language': 'fr-xx',
         'text': 'sœur'
     }]
     count, documents = documents_by_text.documents_by_text(
         grouped_targets, queries, 0, 10)
     self.assertEqual(1, count)
     document_ids = [document["id"] for document in documents]
     self.assertIn(ALICE_FR_DOC_ID, document_ids)
示例#3
0
 def test_documents_by_text_english_books(self):
     global envIdReadOnly
     global authorizationReadOnly
     grouped_targets = {CORPUS_ID: []}
     documents_by_text = DocumentsByText(envIdReadOnly,
                                         authorizationReadOnly)
     queries = [{
         'operator': 'must',
         'search_mode': 'language',
         'language': 'en-xx',
         'text': 'books'
     }]
     count, documents = documents_by_text.documents_by_text(
         grouped_targets, queries, 0, 10)
     self.assertEqual(1, count)
     document_ids = [document["id"] for document in documents]
     self.assertIn(ALICE_EN_DOC_ID, document_ids)
示例#4
0
 def test_documents_by_text_basic(self):
     global envIdReadOnly
     global authorizationReadOnly
     documents_by_text = DocumentsByText(envIdReadOnly,
                                         authorizationReadOnly)
     grouped_targets = {CORPUS_ID: []}
     queries = [{
         'operator': 'must',
         'search_mode': 'basic',
         'language': '',
         'text': 'alice'
     }]
     count, documents = documents_by_text.documents_by_text(
         grouped_targets, queries, 0, 10)
     self.assertEqual(2, count)
     document_ids = [document["id"] for document in documents]
     self.assertIn(ALICE_EN_DOC_ID, document_ids)
     self.assertIn(ALICE_FR_DOC_ID, document_ids)
示例#5
0
 def test_documents_by_text_must_and_must_not(self):
     global envIdReadOnly
     global authorizationReadOnly
     grouped_targets = {CORPUS_ID: []}
     documents_by_text = DocumentsByText(envIdReadOnly,
                                         authorizationReadOnly)
     must = {
         'operator': 'must',
         'search_mode': 'basic',
         'language': '',
         'text': 'alice'
     }
     must_not = {
         'operator': 'must_not',
         'search_mode': 'language',
         'language': 'fr-xx',
         'text': 'sœur'
     }
     count, documents = documents_by_text.documents_by_text(
         grouped_targets, [must, must_not], 0, 10)
     self.assertEqual(1, count)
     document_ids = [document["id"] for document in documents]
     self.assertIn(ALICE_EN_DOC_ID, document_ids)