def test_simple_different(self): doc1 = "abc def ghi1" doc2 = "abc def xxx" similarity = get_similarity(doc1, doc2) self.assertLess(similarity, 1.0)
def test_spam_bounce(self): # Test a "normal" spam body against a bounce doc1 = open("main/tests/blacklist_emails/spam_2.txt").read() doc2 = open("main/tests/blacklist_emails/spam_1b.txt").read() similarity = get_similarity(doc1, doc2) self.assertLess(similarity, 0.8)
def test_bounce(self): doc1 = open("main/tests/blacklist_emails/spam_1a.txt").read() doc2 = open("main/tests/blacklist_emails/spam_1b.txt").read() similarity = get_similarity(doc1, doc2) self.assertGreaterEqual(similarity, 0.9)
def test_repeat_text(self): # Test that repeated words do not affect the (cosine) similarity doc1 = "abc def ghi1" doc2 = doc1 + " " + doc1 + " " + doc1 similarity = get_similarity(doc1, doc2) self.assertGreaterEqual(similarity, 1.0)
def test_simple_name(self): doc1 = "abc def ghi1" similarity = get_similarity(doc1, doc1) self.assertGreaterEqual(similarity, 1.0)