Пример #1
0
	def test_cosine(self):
		vector1 = [1, 2, 3]

		vector2 = vector1
		expected = 1
		self.assertAlmostEqual(expected, proximity.cosine(vector1, vector2))

		vector2 = [-4, 8, -4]
		expected = 0
		self.assertAlmostEqual(expected, proximity.cosine(vector1, vector2))

		vector2 = [5, 6, 1]
		expected = 0.678844233302
		self.assertAlmostEqual(expected, proximity.cosine(vector1, vector2))
Пример #2
0
NPH_words = file_to_dict('input_files/wiki_NPH.txt');
NPC_words = file_to_dict('input_files/wiki_NPC.txt');

#compute cosine similarity
print 'This program uses cosine similarity metric to compare text documents:'
print
print "Here's how the documents were generated:"
print "doc1.txt : seq 1.0 1.0 100.0 > doc1.txt"
print "doc2.txt : seq 0.5 99.5 > doc2.txt"
print "doc3.txt : seq 1 0.5 50 > doc3.txt"
print "wiki_NPH.txt : wget http://en.wikipedia.org/wiki/NP-hard -O wiki_NPH.txt"
print "wiki_NPC.txt : wget http://en.wikipedia.org/wiki/NP-complete -O wiki_NPH.txt"
print
print "Document similarities:"
print "======================================================================"
print "cosine similarity of doc1.txt vs. doc2.txt = %.4f" % cosine(f1_words, f2_words);
print "expecting similarity of 0.0"
print "======================================================================"
print "cosine similarity of doc2.txt vs. doc3.txt = %.4f" % cosine(f2_words, f3_words);
print "expecting similarity of 0.5"
print "======================================================================"
print "cosine similarity of doc1.txt vs. doc3.txt = %.4f" % cosine(f1_words, f3_words);
print "expecting similarity of 0.5"
print "======================================================================"
print "cosine similarity of doc2.txt vs. doc2.txt = %.4f" % cosine(f2_words, f2_words);
print "expecting similarity of 1.0"
print "======================================================================"
print "cosine similarity of wiki_NPH.txt vs. wiki_NPC.txt = %.4f" % cosine(NPH_words, NPC_words);
print