def test_pretokenized_words_frequencies(self): model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB")) self.assertEqual(model.term_frequency("wa"), 1) self.assertEqual(model.term_frequency("wb"), 2) self.assertEqual(model.term_frequency("wc"), 3) self.assertEqual(model.term_frequency("wd"), 0) self.assertEqual(model.most_frequent_terms(), ("wc", "wb", "wa"))
def test_pretokenized_words_frequencies(self): model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB")) self.assertEqual(model.term_frequency("wa"), 1) self.assertEqual(model.term_frequency("wb"), 2) self.assertEqual(model.term_frequency("wc"), 3) self.assertEqual(model.term_frequency("wd"), 0) self.assertEqual(model.most_frequent_terms(), ("wc", "wb", "wa"))
def test_pretokenized_words_frequencies(): model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB")) assert model.term_frequency("wa") == 1 assert model.term_frequency("wb") == 2 assert model.term_frequency("wc") == 3 assert model.term_frequency("wd") == 0 assert model.most_frequent_terms() == ("wc", "wb", "wa")
def test_pretokenized_words_frequencies(): model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB")) assert model.term_frequency("wa") == 1 assert model.term_frequency("wb") == 2 assert model.term_frequency("wc") == 3 assert model.term_frequency("wd") == 0 assert model.most_frequent_terms() == ("wc", "wb", "wa")
def test_term_frequency(self): tokenizer = Tokenizer("english") text = "wA wB wC wA wA wC wD wCwB" model = TfDocumentModel(text, tokenizer) self.assertEqual(model.term_frequency("wa"), 3) self.assertEqual(model.term_frequency("wb"), 1) self.assertEqual(model.term_frequency("wc"), 2) self.assertEqual(model.term_frequency("wd"), 1) self.assertEqual(model.term_frequency("wcwb"), 1) self.assertEqual(model.term_frequency("we"), 0) self.assertEqual(model.term_frequency("missing"), 0)
def test_term_frequency(self): tokenizer = Tokenizer("english") text = "wA wB wC wA wA wC wD wCwB" model = TfDocumentModel(text, tokenizer) self.assertEqual(model.term_frequency("wa"), 3) self.assertEqual(model.term_frequency("wb"), 1) self.assertEqual(model.term_frequency("wc"), 2) self.assertEqual(model.term_frequency("wd"), 1) self.assertEqual(model.term_frequency("wcwb"), 1) self.assertEqual(model.term_frequency("we"), 0) self.assertEqual(model.term_frequency("missing"), 0)
def test_term_frequency(): tokenizer = Tokenizer("english") text = "wA wB wC wA wA wC wD wCwB" model = TfDocumentModel(text, tokenizer) assert model.term_frequency("wa") == 3 assert model.term_frequency("wb") == 1 assert model.term_frequency("wc") == 2 assert model.term_frequency("wd") == 1 assert model.term_frequency("wcwb") == 1 assert model.term_frequency("we") == 0 assert model.term_frequency("missing") == 0
def test_term_frequency(): tokenizer = Tokenizer("english") text = "wA wB wC wA wA wC wD wCwB" model = TfDocumentModel(text, tokenizer) assert model.term_frequency("wa") == 3 assert model.term_frequency("wb") == 1 assert model.term_frequency("wc") == 2 assert model.term_frequency("wd") == 1 assert model.term_frequency("wcwb") == 1 assert model.term_frequency("we") == 0 assert model.term_frequency("missing") == 0