Exemplo n.º 1
0
def test_get_term_freqs_sublinear(vectorizer_and_dtm, lamb_and_child_idxs):
    _, doc_term_matrix = vectorizer_and_dtm
    idx_lamb, idx_child = lamb_and_child_idxs
    tfs = vsm.get_term_freqs(doc_term_matrix, type_='linear')
    tfs_sqrt = vsm.get_term_freqs(doc_term_matrix, type_='sqrt')
    tfs_log = vsm.get_term_freqs(doc_term_matrix, type_='log')
    assert len(tfs) == len(tfs_sqrt) == len(tfs_log) == doc_term_matrix.shape[1]
    assert tfs_log.max() == pytest.approx(2.60943, abs=1e-3)
    assert tfs_log.min() == pytest.approx(1.0, abs=1e-3)
    assert tfs_log[idx_lamb] == pytest.approx(2.60943, abs=1e-3)
    assert tfs_log[idx_child] == pytest.approx(1.69314, abs=1e-3)
    assert (tfs_sqrt == np.sqrt(tfs)).all()
    assert (tfs_log == np.log(tfs) + 1.0).all()
Exemplo n.º 2
0
 def test_get_term_freqs_normalized(self):
     term_freqs = vsm.get_term_freqs(self.doc_term_matrix, normalized=True)
     self.assertEqual(len(term_freqs), self.doc_term_matrix.shape[1])
     self.assertAlmostEqual(term_freqs.max(), 0.19230, places=4)
     self.assertAlmostEqual(term_freqs.min(), 0.03846, places=4)
     self.assertAlmostEqual(term_freqs[self.idx_lamb], 0.1923, places=4)
     self.assertAlmostEqual(term_freqs[self.idx_child], 0.07692, places=4)
Exemplo n.º 3
0
 def test_get_term_freqs(self):
     term_freqs = vsm.get_term_freqs(self.doc_term_matrix, normalized=False)
     self.assertEqual(len(term_freqs), self.doc_term_matrix.shape[1])
     self.assertEqual(term_freqs.min(), 1)
     self.assertEqual(term_freqs.max(), 5)
     self.assertEqual(term_freqs[self.idx_lamb], 5)
     self.assertEqual(term_freqs[self.idx_child], 2)
Exemplo n.º 4
0
 def test_get_term_freqs_normalized(self):
     term_freqs = vsm.get_term_freqs(self.doc_term_matrix, normalized=True)
     self.assertEqual(len(term_freqs), self.doc_term_matrix.shape[1])
     self.assertAlmostEqual(term_freqs.max(), 0.19230, places=4)
     self.assertAlmostEqual(term_freqs.min(), 0.03846, places=4)
     self.assertAlmostEqual(term_freqs[self.idx_lamb], 0.1923, places=4)
     self.assertAlmostEqual(term_freqs[self.idx_child], 0.07692, places=4)
Exemplo n.º 5
0
 def test_get_term_freqs(self):
     term_freqs = vsm.get_term_freqs(self.doc_term_matrix, normalized=False)
     self.assertEqual(len(term_freqs), self.doc_term_matrix.shape[1])
     self.assertEqual(term_freqs.min(), 1)
     self.assertEqual(term_freqs.max(), 5)
     self.assertEqual(term_freqs[self.idx_lamb], 5)
     self.assertEqual(term_freqs[self.idx_child], 2)
Exemplo n.º 6
0
def test_get_term_freqs(vectorizer_and_dtm, lamb_and_child_idxs):
    _, doc_term_matrix = vectorizer_and_dtm
    idx_lamb, idx_child = lamb_and_child_idxs
    term_freqs = vsm.get_term_freqs(doc_term_matrix, type_='linear')
    assert len(term_freqs) == doc_term_matrix.shape[1]
    assert term_freqs.min() == 1
    assert term_freqs.max() == 5
    assert term_freqs[idx_lamb] == 5
    assert term_freqs[idx_child] == 2
Exemplo n.º 7
0
def test_get_term_freqs_normalized(vectorizer_and_dtm, lamb_and_child_idxs):
    _, doc_term_matrix = vectorizer_and_dtm
    idx_lamb, idx_child = lamb_and_child_idxs
    term_freqs = vsm.get_term_freqs(doc_term_matrix, normalized=True)
    assert len(term_freqs) == doc_term_matrix.shape[1]
    assert term_freqs.max() == pytest.approx(0.15625, abs=1e-3)
    assert term_freqs.min() == pytest.approx(0.03125, abs=1e-3)
    assert term_freqs[idx_lamb] == pytest.approx(0.15625, abs=1e-3)
    assert term_freqs[idx_child] == pytest.approx(0.06250, abs=1e-3)
Exemplo n.º 8
0
def test_get_term_freqs_exception():
    with pytest.raises(ValueError):
        _ = vsm.get_term_freqs(coo_matrix((1, 1)).tocsr())