示例#1
0
class IndexerTest(unittest.TestCase):
    def setUp(self):
        self.store_mock = IndexStoreMock()
        self.tokenizer_mock = TokenizerMock()
        self.indexer = Indexer(self.store_mock, self.tokenizer_mock)

    def test_term_document_frequency(self):
        # Arrange
        term = "foo"
        document = uuid.uuid4()

        # Act
        self.indexer.term_document_frequency(document, term)

        # Assert
        self.assertEqual(1, self.store_mock.num_method_calls("term_document_frequency"))
        arguments =  self.store_mock.get_arguments("term_document_frequency")
        self.assertEqual(document, arguments[0])
        self.assertEqual(term, arguments[1])

    def test_document_frequency_normalized(self):
        # Arrange
        term = "foo"
        document_frequency = 22
        num_documents = 100
        self.store_mock.set_document_frequency(document_frequency)
        self.store_mock.set_num_documents(num_documents)

        # Act
        result = self.indexer.document_frequency_normalized(term)

        # Assert
        self.assertEqual(1, self.store_mock.num_method_calls("document_frequency"))
        document_frequency_args = self.store_mock.get_arguments("document_frequency")
        self.assertEqual(term, document_frequency_args[0])

        self.assertEqual(1, self.store_mock.num_method_calls("num_documents"))

        self.assertEqual(result, 0.22)

    def test_index_empty_text(self):
        # Arrange
        document = uuid.uuid4()
        text = ""
        self.tokenizer_mock.set_tokens([])

        # Act
        self.indexer.index(text, document)

        # Assert
        self.assertEqual(1, self.tokenizer_mock.num_method_calls("tokenize"))
        tokenize_arguments = self.tokenizer_mock.get_arguments("tokenize")
        self.assertEqual(text, tokenize_arguments[0])

        self.assertFalse(self.store_mock.was_called("add"))

    def test_index_one_token(self):
        # Arrange
        document = uuid.uuid4()
        text = "foo"
        self.tokenizer_mock.set_tokens([text])

        # Act
        self.indexer.index(text, document)

        # Assert
        self.assertEqual(1, self.tokenizer_mock.num_method_calls("tokenize"))
        tokenize_arguments = self.tokenizer_mock.get_arguments("tokenize")
        self.assertEqual(text, tokenize_arguments[0])

        self.assertEqual(1, self.store_mock.num_method_calls("add"))
        add_arguments = self.store_mock.get_arguments("add")
        self.assertEqual(document, add_arguments[0])
        self.assertEqual(text, add_arguments[1])

    def test_index_two_tokens(self):
        # Arrange
        document = uuid.uuid4()
        tokens = ["foo", "bar"]
        text = " ".join(tokens)
        self.tokenizer_mock.set_tokens(tokens)

        # Act
        self.indexer.index(text, document)

        # Assert
        self.assertEqual(1, self.tokenizer_mock.num_method_calls("tokenize"))
        tokenize_arguments = self.tokenizer_mock.get_arguments("tokenize")
        self.assertEqual(text, tokenize_arguments[0])

        self.assertEqual(2, self.store_mock.num_method_calls("add"))

        add_arguments1 = self.store_mock.get_arguments("add", 1)
        self.assertEqual(document, add_arguments1[0])
        self.assertEqual(tokens[0], add_arguments1[1])

        add_arguments2 = self.store_mock.get_arguments("add", 2)
        self.assertEqual(document, add_arguments2[0])
        self.assertEqual(tokens[1], add_arguments2[1])

    def test_get_posting_list(self):
        # Arrange
        term = "foo"

        # Act
        self.indexer.get_posting_list(term)

        # Assert
        self.assertEqual(1, self.store_mock.num_method_calls("posting_list"))
        arguments = self.store_mock.get_arguments("posting_list")
        self.assertEqual(term, arguments[0])

    def test_get_terms(self):
        # Arrange
        terms = {"foo", "bar"}
        document = uuid.uuid4()

        self.store_mock.set_terms(terms)

        # Act
        result = self.indexer.get_terms(document)

        # Assert
        self.assertEqual(1, self.store_mock.num_method_calls("get_terms"))
        arguments = self.store_mock.get_arguments("get_terms")
        self.assertEqual(document, arguments[0])
        self.assertEqual(terms, result)