Пример #1
0
    def test_pretokenized_words_frequencies(self):
        model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB"))

        self.assertEqual(model.term_frequency("wa"), 1)
        self.assertEqual(model.term_frequency("wb"), 2)
        self.assertEqual(model.term_frequency("wc"), 3)
        self.assertEqual(model.term_frequency("wd"), 0)

        self.assertEqual(model.most_frequent_terms(), ("wc", "wb", "wa"))
Пример #2
0
    def test_pretokenized_words_frequencies(self):
        model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB"))

        self.assertEqual(model.term_frequency("wa"), 1)
        self.assertEqual(model.term_frequency("wb"), 2)
        self.assertEqual(model.term_frequency("wc"), 3)
        self.assertEqual(model.term_frequency("wd"), 0)

        self.assertEqual(model.most_frequent_terms(), ("wc", "wb", "wa"))
Пример #3
0
def test_pretokenized_words_frequencies():
    model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB"))

    assert model.term_frequency("wa") == 1
    assert model.term_frequency("wb") == 2
    assert model.term_frequency("wc") == 3
    assert model.term_frequency("wd") == 0

    assert model.most_frequent_terms() == ("wc", "wb", "wa")
Пример #4
0
def test_pretokenized_words_frequencies():
    model = TfDocumentModel(("wC", "wC", "WC", "wA", "WB", "wB"))

    assert model.term_frequency("wa") == 1
    assert model.term_frequency("wb") == 2
    assert model.term_frequency("wc") == 3
    assert model.term_frequency("wd") == 0

    assert model.most_frequent_terms() == ("wc", "wb", "wa")
Пример #5
0
    def test_term_frequency(self):
        tokenizer = Tokenizer("english")
        text = "wA wB wC wA wA wC wD wCwB"
        model = TfDocumentModel(text, tokenizer)

        self.assertEqual(model.term_frequency("wa"), 3)
        self.assertEqual(model.term_frequency("wb"), 1)
        self.assertEqual(model.term_frequency("wc"), 2)
        self.assertEqual(model.term_frequency("wd"), 1)
        self.assertEqual(model.term_frequency("wcwb"), 1)
        self.assertEqual(model.term_frequency("we"), 0)
        self.assertEqual(model.term_frequency("missing"), 0)
Пример #6
0
    def test_term_frequency(self):
        tokenizer = Tokenizer("english")
        text = "wA wB wC wA wA wC wD wCwB"
        model = TfDocumentModel(text, tokenizer)

        self.assertEqual(model.term_frequency("wa"), 3)
        self.assertEqual(model.term_frequency("wb"), 1)
        self.assertEqual(model.term_frequency("wc"), 2)
        self.assertEqual(model.term_frequency("wd"), 1)
        self.assertEqual(model.term_frequency("wcwb"), 1)
        self.assertEqual(model.term_frequency("we"), 0)
        self.assertEqual(model.term_frequency("missing"), 0)
Пример #7
0
def test_term_frequency():
    tokenizer = Tokenizer("english")
    text = "wA wB wC wA wA wC wD wCwB"
    model = TfDocumentModel(text, tokenizer)

    assert model.term_frequency("wa") == 3
    assert model.term_frequency("wb") == 1
    assert model.term_frequency("wc") == 2
    assert model.term_frequency("wd") == 1
    assert model.term_frequency("wcwb") == 1
    assert model.term_frequency("we") == 0
    assert model.term_frequency("missing") == 0
Пример #8
0
def test_term_frequency():
    tokenizer = Tokenizer("english")
    text = "wA wB wC wA wA wC wD wCwB"
    model = TfDocumentModel(text, tokenizer)

    assert model.term_frequency("wa") == 3
    assert model.term_frequency("wb") == 1
    assert model.term_frequency("wc") == 2
    assert model.term_frequency("wd") == 1
    assert model.term_frequency("wcwb") == 1
    assert model.term_frequency("we") == 0
    assert model.term_frequency("missing") == 0