Exemplo n.º 1
0
    def test_w2v(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
            esim = 0.88749
        except:
            # mock a similarity model
            class FakeModel():
                vocab = []
                sim = {}

                def __init__(self):
                    pass

                def similarity(self, w1, w2):
                    if w1 + '_' + w2 in self.sim:
                        return self.sim[w1 + '_' + w2]
                    else:
                        return self.sim[w2 + '_' + w1]

            model = FakeModel()
            model.vocab = ['right/JJ', 'wrong/JJ']
            model.sim = {'right/JJ_wrong/JJ': 0.5}
            esim = 0.9166

        s1 = "This/T beautiful/JJ sentence/NN is/V not/N right/JJ ./PUNCT"
        s2 = "This/T beautiful/JJ sentence/NN is/V wrong/JJ ./PUNCT"

        psim = sm.w2v(s1, s2, model)
        self.assertAlmostEqual(esim, psim, places=3)

        # order doesn't matter
        psim2 = sm.w2v(s2, s1, model)
        self.assertEqual(psim, psim2)
Exemplo n.º 2
0
    def test_w2v(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
            esim = 0.88749
        except:
            # mock a similarity model
            class FakeModel():
                vocab = []
                sim = {}

                def __init__(self):
                    pass

                def similarity(self, w1, w2):
                    if w1 + '_' + w2 in self.sim:
                        return self.sim[w1 + '_' + w2]
                    else:
                        return self.sim[w2 + '_' + w1]

            model = FakeModel()
            model.vocab = ['right/JJ', 'wrong/JJ']
            model.sim = {'right/JJ_wrong/JJ': 0.5}
            esim = 0.9166

        s1 = "This/T beautiful/JJ sentence/NN is/V not/N right/JJ ./PUNCT"
        s2 = "This/T beautiful/JJ sentence/NN is/V wrong/JJ ./PUNCT"

        psim = sm.w2v(s1, s2, model)
        self.assertAlmostEqual(esim, psim, places=3)

        # order doesn't matter
        psim2 = sm.w2v(s2, s1, model)
        self.assertEqual(psim, psim2)
Exemplo n.º 3
0
    def test_w2v_singleword(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
        except:
            return

        s1 = "right/JJ"
        s2 = "wrong/JJ"

        # a single different word means 0 sim
        esim = 0
        psim = sm.w2v(s1, s2, model)
        self.assertEqual(esim, psim)

        s3 = "right/JJ"
        esim2 = 1
        psim2 = sm.w2v(s1, s3, model)
        self.assertEqual(esim2, psim2)
Exemplo n.º 4
0
    def test_w2v_singleword(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
        except:
            return

        s1 = "right/JJ"
        s2 = "wrong/JJ"

        # a single different word means 0 sim
        esim = 0
        psim = sm.w2v(s1, s2, model)
        self.assertEqual(esim, psim)

        s3 = "right/JJ"
        esim2 = 1
        psim2 = sm.w2v(s1, s3, model)
        self.assertEqual(esim2, psim2)
Exemplo n.º 5
0
    def test_w2v_notinvocab(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
        except:
            return

        s1 = "This/T beauful/JJ sentence/NN is/V not/N right/JJ ./PUNCT"
        s2 = "This/T beautiful/JJ sentence/NN is/V wrong/JJ ./PUNCT"

        esim = 0.8
        psim = sm.w2v(s1, s2, model)
        self.assertAlmostEqual(esim, psim, places=1)
Exemplo n.º 6
0
    def test_w2v_notinvocab(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
        except:
            return

        s1 = "This/T beauful/JJ sentence/NN is/V not/N right/JJ ./PUNCT"
        s2 = "This/T beautiful/JJ sentence/NN is/V wrong/JJ ./PUNCT"

        esim = 0.7208
        psim = sm.w2v(s1, s2, model)
        self.assertAlmostEqual(esim, psim, places=3)
Exemplo n.º 7
0
    def test_w2v_untag(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
        except:
            return

        s1 = "This sentence is not right ."
        s2 = "This sentence is wrong ."

        # without tags we consider the intersection over min length
        esim = 4.0/5
        psim = sm.w2v(s1, s2, model)
        self.assertEqual(esim, psim)
Exemplo n.º 8
0
    def test_w2v_untag(self):
        testdir = os.path.dirname(os.path.realpath(__file__))
        modelfile = testdir + '/../potara/data/enwiki9stempos.model'
        try:
            model = gensim.models.word2vec.Word2Vec.load(modelfile)
        except:
            return

        s1 = "This sentence is not right ."
        s2 = "This sentence is wrong ."

        # without tags we consider the intersection over min length
        esim = 4.0/5
        psim = sm.w2v(s1, s2, model)
        self.assertEqual(esim, psim)