def test_similar_by_sentence(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.similar_by_sentence(sentence=["the", "product", "is", "good"], model=m) self.assertEqual(4, o[0][0])
def test_similar_by_sentence_wrong_model(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) with self.assertRaises(RuntimeError): m.sv.similar_by_sentence(sentence=["the", "product", "is", "good"], model=W2V)
def test_similar_by_word(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.similar_by_word(word="the", wv=m.wv) self.assertEqual(96, o[0][0]) o = m.sv.similar_by_word(word="the", wv=m.wv, indexable=sentences) self.assertEqual(96, o[0][1])
def test_most_similar_wrong_indexable(self): def indexable(self): pass sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) with self.assertRaises(RuntimeError): m.sv.most_similar(positive=0, indexable=indexable)
def test_most_similar_vecs(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) m.sv.init_sims() v = m.sv[[0, 1]] o = m.sv.most_similar(positive=v) self.assertEqual(1, o[0][0]) self.assertEqual(0, o[1][0])
def test_most_similar_vec(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) m.sv.init_sims() v = m.sv.get_vector(0, use_norm=True) o = m.sv.most_similar(positive=v) # Includes 0 obviously self.assertEqual(45, o[1][0]) self.assertEqual(35, o[2][0])
def test_most_similar(self): sent_ind = IndexedList(SENTENCES) sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.most_similar(positive=0) self.assertEqual(45, o[0][0]) self.assertEqual(35, o[1][0]) o = m.sv.most_similar(positive=0, indexable=sentences) self.assertEqual("Looks good and fits snug", o[0][0]) o = m.sv.most_similar(positive=0, indexable=sent_ind) self.assertEqual("Looks good and fits snug".split(), o[0][0][0])
def test_most_similar_restrict_size_tuple(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.most_similar(positive=20, topn=20, restrict_size=(5, 25)) self.assertEqual(19, len(o)) self.assertEqual(22, o[0][0]) o = m.sv.most_similar(positive=1, topn=20, restrict_size=(5, 25)) self.assertEqual(20, len(o)) self.assertEqual(9, o[0][0]) o = m.sv.most_similar(positive=1, topn=20, restrict_size=(5, 25), indexable=sentences) self.assertEqual(20, len(o)) self.assertEqual(9, o[0][1])
def test_similar_by_vector(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.similar_by_vector(m.wv["the"]) self.assertEqual(96, o[0][0])
def test_most_similar_restrict_size(self): sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.most_similar(positive=20, topn=20, restrict_size=5) self.assertEqual(5, len(o))
def setUp(self): self.p = "fse/test/test_data/test_sentences.txt" self.doc = IndexedLineDocument(self.p)
def setUp(self): self.sentences = IndexedLineDocument(CORPUS) self.model = SIF(W2V, lang_freq="en")