def test_load_single_data(self): base = 'test/data/kcap-basic-vec/' pdl = embloader.VecPairLoader(self.v) X, Y, n, tc, tf = pdl.load_single_data(base + 'pair.tsv') self.assertEqual(6, X.shape[0]) self.assertEqual(300, X.shape[1]) self.assertEqual(6, Y.shape[0])
def test_load_pair_missing_word(self): base = 'test/data/kcap-basic-vec/' pdl = embloader.VecPairLoader(self.v) X, Y, n, tc, tf = pdl.load_pair_data(base + 'pair2.tsv') self.assertEqual(7, X.shape[0]) self.assertEqual(7, n) self.assertEqual(n * 2, tc) # all pairs are single words self.assertEqual(13, tf) # one word out of vocab self.assertEqual(600, X.shape[1]) self.assertEqual(7, Y.shape[0])
def test_split_data(self): base = 'test/data/kcap-basic-vec/' pdl = embloader.VecPairLoader(self.v) X, Y, _, _, _ = pdl.load_pair_data(base + 'pair.tsv') tr, va, te = pdl.split_data(X, Y, train_percent=.5, validate_percent=0.17, seed=3, batch_size=1) self.assertIsNotNone(tr) self.assertEqual(3, len(tr)) self.assertIsNotNone(va) self.assertEqual(1, len(va)) self.assertIsNotNone(te) self.assertEqual(2, len(te))
def vecpath_to_loader(vecpath, dim=300): vecs = embloader.SwivelAsTorchTextVector(vecpath+'vecs.bin', vecpath+'vocab.txt', dim) return embloader.VecPairLoader(vecs)
def test_create_ok(self): vpl = embloader.VecPairLoader(self.v) self.assertTrue(vpl)
def test_create_missing_vectors(self): with self.assertRaises(AssertionError): embloader.VecPairLoader(None)