class IndexerTest(unittest.TestCase): def setUp(self): self.store_mock = IndexStoreMock() self.tokenizer_mock = TokenizerMock() self.indexer = Indexer(self.store_mock, self.tokenizer_mock) def test_term_document_frequency(self): # Arrange term = "foo" document = uuid.uuid4() # Act self.indexer.term_document_frequency(document, term) # Assert self.assertEqual(1, self.store_mock.num_method_calls("term_document_frequency")) arguments = self.store_mock.get_arguments("term_document_frequency") self.assertEqual(document, arguments[0]) self.assertEqual(term, arguments[1]) def test_document_frequency_normalized(self): # Arrange term = "foo" document_frequency = 22 num_documents = 100 self.store_mock.set_document_frequency(document_frequency) self.store_mock.set_num_documents(num_documents) # Act result = self.indexer.document_frequency_normalized(term) # Assert self.assertEqual(1, self.store_mock.num_method_calls("document_frequency")) document_frequency_args = self.store_mock.get_arguments("document_frequency") self.assertEqual(term, document_frequency_args[0]) self.assertEqual(1, self.store_mock.num_method_calls("num_documents")) self.assertEqual(result, 0.22) def test_index_empty_text(self): # Arrange document = uuid.uuid4() text = "" self.tokenizer_mock.set_tokens([]) # Act self.indexer.index(text, document) # Assert self.assertEqual(1, self.tokenizer_mock.num_method_calls("tokenize")) tokenize_arguments = self.tokenizer_mock.get_arguments("tokenize") self.assertEqual(text, tokenize_arguments[0]) self.assertFalse(self.store_mock.was_called("add")) def test_index_one_token(self): # Arrange document = uuid.uuid4() text = "foo" self.tokenizer_mock.set_tokens([text]) # Act self.indexer.index(text, document) # Assert self.assertEqual(1, self.tokenizer_mock.num_method_calls("tokenize")) tokenize_arguments = self.tokenizer_mock.get_arguments("tokenize") self.assertEqual(text, tokenize_arguments[0]) self.assertEqual(1, self.store_mock.num_method_calls("add")) add_arguments = self.store_mock.get_arguments("add") self.assertEqual(document, add_arguments[0]) self.assertEqual(text, add_arguments[1]) def test_index_two_tokens(self): # Arrange document = uuid.uuid4() tokens = ["foo", "bar"] text = " ".join(tokens) self.tokenizer_mock.set_tokens(tokens) # Act self.indexer.index(text, document) # Assert self.assertEqual(1, self.tokenizer_mock.num_method_calls("tokenize")) tokenize_arguments = self.tokenizer_mock.get_arguments("tokenize") self.assertEqual(text, tokenize_arguments[0]) self.assertEqual(2, self.store_mock.num_method_calls("add")) add_arguments1 = self.store_mock.get_arguments("add", 1) self.assertEqual(document, add_arguments1[0]) self.assertEqual(tokens[0], add_arguments1[1]) add_arguments2 = self.store_mock.get_arguments("add", 2) self.assertEqual(document, add_arguments2[0]) self.assertEqual(tokens[1], add_arguments2[1]) def test_get_posting_list(self): # Arrange term = "foo" # Act self.indexer.get_posting_list(term) # Assert self.assertEqual(1, self.store_mock.num_method_calls("posting_list")) arguments = self.store_mock.get_arguments("posting_list") self.assertEqual(term, arguments[0]) def test_get_terms(self): # Arrange terms = {"foo", "bar"} document = uuid.uuid4() self.store_mock.set_terms(terms) # Act result = self.indexer.get_terms(document) # Assert self.assertEqual(1, self.store_mock.num_method_calls("get_terms")) arguments = self.store_mock.get_arguments("get_terms") self.assertEqual(document, arguments[0]) self.assertEqual(terms, result)