def test_cy_equal_np_ft_random(self): ft = FastText(size=20, min_count=1) ft.build_vocab(SENTENCES) m1 = Average(ft) m1.prep.prepare_vectors(sv=m1.sv, total_sentences=len(self.sentences), update=False) m1._pre_train_calls() from fse.models.average_inner import MAX_NGRAMS_IN_BATCH m1.batch_ngrams = MAX_NGRAMS_IN_BATCH mem1 = m1._get_thread_working_mem() o1 = train_average_np(m1, self.sentences[:2], m1.sv.vectors, mem1) m2 = Average(ft) m2.prep.prepare_vectors(sv=m2.sv, total_sentences=len(self.sentences), update=False) m2._pre_train_calls() mem2 = m2._get_thread_working_mem() from fse.models.average_inner import train_average_cy o2 = train_average_cy(m2, self.sentences[:2], m2.sv.vectors, mem2) self.assertEqual(o1, o2) self.assertTrue(np.allclose(m1.sv.vectors, m2.sv.vectors, atol=1e-6))
def test_cy_equal_np_w2v_random(self): w2v = Word2Vec(min_count=1, size=DIM) # Random initialization w2v.build_vocab(SENTENCES) m1 = Average(w2v) m1.prep.prepare_vectors( sv=m1.sv, total_sentences=len(self.sentences), update=False ) m1._pre_train_calls() mem1 = m1._get_thread_working_mem() o1 = train_average_np(m1, self.sentences, m1.sv.vectors, mem1) m2 = Average(w2v) m2.prep.prepare_vectors( sv=m2.sv, total_sentences=len(self.sentences), update=False ) m2._pre_train_calls() mem2 = m2._get_thread_working_mem() from fse.models.average_inner import train_average_cy o2 = train_average_cy(m2, self.sentences, m2.sv.vectors, mem2) self.assertTrue(np.allclose(m1.sv.vectors, m2.sv.vectors, atol=1e-6))
def test_average_train_np_w2v(self): self.model.sv.vectors = np.zeros_like(self.model.sv.vectors, dtype=np.float32) mem = self.model._get_thread_working_mem() output = train_average_np( self.model, self.sentences, self.model.sv.vectors, mem ) self.assertEqual((4, 7), output) self.assertTrue((183 == self.model.sv[0]).all()) self.assertTrue((164.5 == self.model.sv[1]).all()) self.assertTrue((self.model.wv.vocab["go"].index == self.model.sv[2]).all())
def test_average_train_np_ft(self): ft = FastText(min_count=1, size=DIM) ft.build_vocab(SENTENCES) m = Average(ft) m.prep.prepare_vectors( sv=m.sv, total_sentences=len(self.sentences), update=False ) m._pre_train_calls() m.wv.vectors = m.wv.vectors_vocab = np.ones_like(m.wv.vectors, dtype=np.float32) m.wv.vectors_ngrams = np.full_like(m.wv.vectors_ngrams, 2, dtype=np.float32) mem = m._get_thread_working_mem() output = train_average_np(m, self.sentences, m.sv.vectors, mem) self.assertEqual((4, 10), output) self.assertTrue((1.0 == m.sv[0]).all()) self.assertTrue((1.5 == m.sv[2]).all()) self.assertTrue((2 == m.sv[3]).all())
def test_cy_equal_np_w2v(self): m1 = Average(W2V) m1.prep.prepare_vectors(sv=m1.sv, total_sentences=len(self.sentences), update=False) m1._pre_train_calls() mem1 = m1._get_thread_working_mem() o1 = train_average_np(m1, self.sentences, m1.sv.vectors, mem1) m2 = Average(W2V) m2.prep.prepare_vectors(sv=m2.sv, total_sentences=len(self.sentences), update=False) m2._pre_train_calls() mem2 = m2._get_thread_working_mem() o2 = train_average_cy(m2, self.sentences, m2.sv.vectors, mem2) self.assertEqual(o1, o2) self.assertTrue((m1.sv.vectors == m2.sv.vectors).all())