Пример #1
0
    def test_load_single_data(self):
        base = 'test/data/kcap-basic-vec/'

        pdl = embloader.VecPairLoader(self.v)
        X, Y, n, tc, tf = pdl.load_single_data(base + 'pair.tsv')
        self.assertEqual(6, X.shape[0])
        self.assertEqual(300, X.shape[1])
        self.assertEqual(6, Y.shape[0])
Пример #2
0
    def test_load_pair_missing_word(self):
        base = 'test/data/kcap-basic-vec/'

        pdl = embloader.VecPairLoader(self.v)
        X, Y, n, tc, tf = pdl.load_pair_data(base + 'pair2.tsv')
        self.assertEqual(7, X.shape[0])
        self.assertEqual(7, n)
        self.assertEqual(n * 2, tc)  # all pairs are single words
        self.assertEqual(13, tf)  # one word out of vocab
        self.assertEqual(600, X.shape[1])
        self.assertEqual(7, Y.shape[0])
Пример #3
0
    def test_split_data(self):
        base = 'test/data/kcap-basic-vec/'

        pdl = embloader.VecPairLoader(self.v)
        X, Y, _, _, _ = pdl.load_pair_data(base + 'pair.tsv')
        tr, va, te = pdl.split_data(X,
                                    Y,
                                    train_percent=.5,
                                    validate_percent=0.17,
                                    seed=3,
                                    batch_size=1)
        self.assertIsNotNone(tr)
        self.assertEqual(3, len(tr))
        self.assertIsNotNone(va)
        self.assertEqual(1, len(va))
        self.assertIsNotNone(te)
        self.assertEqual(2, len(te))
Пример #4
0
 def vecpath_to_loader(vecpath, dim=300):
     vecs = embloader.SwivelAsTorchTextVector(vecpath+'vecs.bin',
                                              vecpath+'vocab.txt', dim)
     return embloader.VecPairLoader(vecs)
Пример #5
0
 def test_create_ok(self):
     vpl = embloader.VecPairLoader(self.v)
     self.assertTrue(vpl)
Пример #6
0
 def test_create_missing_vectors(self):
     with self.assertRaises(AssertionError):
         embloader.VecPairLoader(None)