Пример #1
0
    def test_space_compose_dense(self):

        test_cases = [
            ([("a", "b", "a_b")], self.space4, self.space5,
             DenseMatrix.identity(2), DenseMatrix.identity(2)),
            ([("a", "b", "a_b")], self.space4, self.space6,
             np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]])),
            ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7,
             DenseMatrix.identity(2), DenseMatrix.identity(2)),
        ]

        for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases:
            comp_model = FullAdditive(A=mat_a, B=mat_b)
            comp_space = comp_model.compose(in_data, arg_space)

            np.testing.assert_array_almost_equal(
                comp_space.cooccurrence_matrix.mat,
                phrase_space.cooccurrence_matrix.mat, 10)

            self.assertListEqual(comp_space.id2column, [])
            self.assertDictEqual(comp_space.column2id, {})

            self.assertListEqual(comp_space.id2row, phrase_space.id2row)
            self.assertDictEqual(comp_space.row2id, phrase_space.row2id)

            self.assertFalse(comp_model._has_intercept)
Пример #2
0
 def test_space_train_dense(self):
     
     test_cases = [([("a","b","a_b")], self.space4, self.space5),
                   ([("a","b","a_b")], self.space4, self.space6),
                   ([("a","b","a_b"),("a","b","a_a")], self.space4, self.space7),
                   ]
         
     learners = [RidgeRegressionLearner(intercept=False, crossvalidation=False, param=0),
                 LstsqRegressionLearner(intercept=False),
                 LstsqRegressionLearner(intercept=True)]
                 
     for in_data, arg_space, phrase_space in test_cases:            
         for learner_ in learners:
             comp_model = FullAdditive(learner=learner_)
             
             comp_model.train(in_data, arg_space, phrase_space)
             comp_space = comp_model.compose(in_data, arg_space)
             
             np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                                  phrase_space.cooccurrence_matrix.mat, 10)
                   
             self.assertListEqual(comp_space.id2column, phrase_space.id2column)
             self.assertDictEqual(comp_space.column2id, phrase_space.column2id)
             
             self.assertListEqual(comp_space.id2row, phrase_space.id2row)
             self.assertDictEqual(comp_space.row2id, phrase_space.row2id)
             
             self.assertEqual(comp_model._has_intercept, learner_._intercept)
Пример #3
0
 def test_train1(self):
     test_cases = [(self.m11, self.m21, self.ph1, np.mat([[2]]), np.mat([[3]])),
                   (self.m11, self.m21, DenseMatrix(np.mat([[0],[0]])),
                    np.mat([[0]]), np.mat([[0]]))]
     
     for m1, m2, ph, expected_a, expected_b in test_cases:
         comp_model = FullAdditive(learner=LstsqRegressionLearner(intercept=False))
         comp_model._train(m1, m2, ph)
         np.testing.assert_array_almost_equal(comp_model._mat_a_t.transpose().mat,
                                              expected_a, 10)
         np.testing.assert_array_almost_equal(comp_model._mat_b_t.transpose().mat,
                                              expected_b, 10)
Пример #4
0
 def test_space_compose_sparse(self):
     #WHAT TO DO HERE???
     #PARAMTERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE??
     
     test_cases = [([("a","b","a_b")], self.space1, self.space2, DenseMatrix.identity(2), DenseMatrix.identity(2)),
                   ([("a","b","a_b")], self.space1, self.space3, np.mat([[0,0],[0,0]]), np.mat([[0,0],[0,0]]))
                   ]
     
     for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases:
         comp_model = FullAdditive(A=mat_a, B=mat_b)
         comp_space = comp_model.compose(in_data, arg_space)
         
         np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat.todense(),
                                              phrase_space.cooccurrence_matrix.mat.todense(), 10)
Пример #5
0
    def test_train1(self):
        test_cases = [(self.m11, self.m21, self.ph1, np.mat([[2]]),
                       np.mat([[3]])),
                      (self.m11, self.m21, DenseMatrix(np.mat([[0], [0]])),
                       np.mat([[0]]), np.mat([[0]]))]

        for m1, m2, ph, expected_a, expected_b in test_cases:
            comp_model = FullAdditive(learner=LstsqRegressionLearner(
                intercept=False))
            comp_model._train(m1, m2, ph)
            np.testing.assert_array_almost_equal(
                comp_model._mat_a_t.transpose().mat, expected_a, 10)
            np.testing.assert_array_almost_equal(
                comp_model._mat_b_t.transpose().mat, expected_b, 10)
Пример #6
0
 def test_full_additive(self):
     
     self.m12 = DenseMatrix(np.mat([[3,1],[9,2]]))
     self.m22 = DenseMatrix(np.mat([[4,3],[2,1]]))
     self.ph2 = DenseMatrix(np.mat([[18,11],[24,7]]))
     self.row = ["a", "b"]
     self.ft = ["f1","f2"]
     self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
     self.space2 = Space(DenseMatrix(self.ph2), ["a_a","a_b"], self.ft)
     m = FullAdditive()
     self.assertRaises(IllegalStateError, m.export,self.prefix + ".full1")
     m.train([("a","b","a_b"),("a","a","a_a")], self.space1, self.space2)
 
     m.export(self.prefix + ".full2")
Пример #7
0
    def test_space_compose_sparse(self):
        #WHAT TO DO HERE???
        #PARAMETERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE??

        test_cases = [([("a", "b", "a_b")], self.space1, self.space2,
                       DenseMatrix.identity(2), DenseMatrix.identity(2)),
                      ([("a", "b", "a_b")], self.space1, self.space3,
                       np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]]))]

        for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases:
            comp_model = FullAdditive(A=mat_a, B=mat_b)
            comp_space = comp_model.compose(in_data, arg_space)

            np.testing.assert_array_almost_equal(
                comp_space.cooccurrence_matrix.mat.todense(),
                phrase_space.cooccurrence_matrix.mat.todense(), 10)
Пример #8
0
    def test_space_compose_dense(self):

        test_cases = [([("a","b","a_b")], self.space4, self.space5, DenseMatrix.identity(2), DenseMatrix.identity(2)),
                      ([("a","b","a_b")], self.space4, self.space6, np.mat([[0,0],[0,0]]), np.mat([[0,0],[0,0]])),
                      ([("a","b","a_b"),("a","b","a_a")], self.space4, self.space7, DenseMatrix.identity(2), DenseMatrix.identity(2)),
                      ]
        
        for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases:
            comp_model = FullAdditive(A=mat_a, B=mat_b)
            comp_space = comp_model.compose(in_data, arg_space)
            
            np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                                 phrase_space.cooccurrence_matrix.mat, 10)
                  
            self.assertListEqual(comp_space.id2column, [])
            self.assertDictEqual(comp_space.column2id, {})
            
            self.assertListEqual(comp_space.id2row, phrase_space.id2row)
            self.assertDictEqual(comp_space.row2id, phrase_space.row2id)
            
            self.assertFalse(comp_model._has_intercept)
Пример #9
0
 def test_train2(self):
     dim_ = 2
     dim_1 = 3
     dim_2 = 5        
     for dim in [dim_1 + dim_2, dim_1 + dim_2 + 2]:
         expected_a = np.mat(np.random.random((dim_,dim_1)))
         expected_b = np.mat(np.random.random((dim_,dim_2)))     
         m1 = np.mat(np.random.random((dim,dim_1)))     
         m2 = np.mat(np.random.random((dim,dim_2)))
        
         ph = np.mat(expected_a*m1.T + expected_b*m2.T)
        
         comp_model = FullAdditive(learner=LstsqRegressionLearner(intercept=False))
         comp_model._train(DenseMatrix(m1),DenseMatrix(m2),
                                    DenseMatrix(ph).transpose())
         np.testing.assert_array_almost_equal(comp_model._mat_a_t.transpose().mat,
                                              expected_a, 10)
         np.testing.assert_array_almost_equal(comp_model._mat_b_t.transpose().mat,
                                              expected_b, 10)
         
     for dim in [dim_1 + dim_2 + 6, dim_1 + dim_2 + 20]:
         expected_a = np.mat(np.random.random((dim_,dim_1)))
         expected_b = np.mat(np.random.random((dim_,dim_2)))     
         m1 = np.mat(np.random.random((dim,dim_1)))     
         m2 = np.mat(np.random.random((dim,dim_2)))
        
         ph = np.mat(expected_a*m1.T + expected_b*m2.T)
                    
         comp_model = FullAdditive(learner=LstsqRegressionLearner(intercept=True))
         comp_model._train(DenseMatrix(m1),DenseMatrix(m2),
                                    DenseMatrix(ph).transpose())
         np.testing.assert_array_almost_equal(comp_model._mat_a_t.transpose().mat,
                                              expected_a, 10)
         np.testing.assert_array_almost_equal(comp_model._mat_b_t[:-1,:].transpose().mat,
                                              expected_b, 10)
Пример #10
0
    def test_space_train_dense(self):

        test_cases = [
            ([("a", "b", "a_b")], self.space4, self.space5),
            ([("a", "b", "a_b")], self.space4, self.space6),
            ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7),
        ]

        learners = [
            RidgeRegressionLearner(intercept=False,
                                   crossvalidation=False,
                                   param=0),
            LstsqRegressionLearner(intercept=False),
            LstsqRegressionLearner(intercept=True)
        ]

        for in_data, arg_space, phrase_space in test_cases:
            for learner_ in learners:
                comp_model = FullAdditive(learner=learner_)

                comp_model.train(in_data, arg_space, phrase_space)
                comp_space = comp_model.compose(in_data, arg_space)

                np.testing.assert_array_almost_equal(
                    comp_space.cooccurrence_matrix.mat,
                    phrase_space.cooccurrence_matrix.mat, 10)

                self.assertListEqual(comp_space.id2column,
                                     phrase_space.id2column)
                self.assertDictEqual(comp_space.column2id,
                                     phrase_space.column2id)

                self.assertListEqual(comp_space.id2row, phrase_space.id2row)
                self.assertDictEqual(comp_space.row2id, phrase_space.row2id)

                self.assertEqual(comp_model._has_intercept,
                                 learner_._intercept)
Пример #11
0
    def test_full_additive(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = FullAdditive()
        self.assertRaises(IllegalStateError, m.export, self.prefix + ".full1")
        m.train([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1,
                self.space2)

        m.export(self.prefix + ".full2")
Пример #12
0
    def test_train2(self):
        dim_ = 2
        dim_1 = 3
        dim_2 = 5
        for dim in [dim_1 + dim_2, dim_1 + dim_2 + 2]:
            expected_a = np.mat(np.random.random((dim_, dim_1)))
            expected_b = np.mat(np.random.random((dim_, dim_2)))
            m1 = np.mat(np.random.random((dim, dim_1)))
            m2 = np.mat(np.random.random((dim, dim_2)))

            ph = np.mat(expected_a * m1.T + expected_b * m2.T)

            comp_model = FullAdditive(learner=LstsqRegressionLearner(
                intercept=False))
            comp_model._train(DenseMatrix(m1), DenseMatrix(m2),
                              DenseMatrix(ph).transpose())
            np.testing.assert_array_almost_equal(
                comp_model._mat_a_t.transpose().mat, expected_a, 10)
            np.testing.assert_array_almost_equal(
                comp_model._mat_b_t.transpose().mat, expected_b, 10)

        for dim in [dim_1 + dim_2 + 6, dim_1 + dim_2 + 20]:
            expected_a = np.mat(np.random.random((dim_, dim_1)))
            expected_b = np.mat(np.random.random((dim_, dim_2)))
            m1 = np.mat(np.random.random((dim, dim_1)))
            m2 = np.mat(np.random.random((dim, dim_2)))

            ph = np.mat(expected_a * m1.T + expected_b * m2.T)

            comp_model = FullAdditive(learner=LstsqRegressionLearner(
                intercept=True))
            comp_model._train(DenseMatrix(m1), DenseMatrix(m2),
                              DenseMatrix(ph).transpose())
            np.testing.assert_array_almost_equal(
                comp_model._mat_a_t.transpose().mat, expected_a, 10)
            np.testing.assert_array_almost_equal(
                comp_model._mat_b_t[:-1, :].transpose().mat, expected_b, 10)
Пример #13
0
composed_space = comp_model.compose(test_phrases, space)

print "Reading similarity test data..."
test_similarity_file = data_path + "ML08data_new.txt"
test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1])
gold = io_utils.read_list(test_similarity_file, field=2)

print "Computing similarity with lexical function..."
pred = composed_space.get_sims(test_pairs, CosSimilarity())

#use this composed space to assign similarities
print "Scoring lexical function..."
print scoring_utils.score(gold, pred, "spearman")

print "Training Full Additive composition model..."
comp_model = FullAdditive(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Training Weighted Additive composition model..."
comp_model = WeightedAdditive()
comp_model.train(train_data, space, per_space)
print "alpha, beta:", comp_model.alpha, comp_model.beta
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Training Dilation composition model..."
comp_model = Dilation()
Пример #14
0
from composes.composition.full_additive import FullAdditive

#training data
train_data = [("good", "car", "good_car"),
              ("good", "book", "good_book")
              ]

#load an argument space
arg_space = io_utils.load("./data/out/ex10.pkl")

#load a phrase space
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")
print("Training phrase space")
print(phrase_space.id2row)
print(phrase_space.cooccurrence_matrix)

#train a FullAdditive model on the data
my_comp = FullAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print("\nA:", my_comp._mat_a_t.transpose())
print("B:", my_comp._mat_b_t.transpose())

#use the model to compose the train data
composed_space  = my_comp.compose([("good", "bike", "good_bike")],
                                  arg_space)
print("\nComposed space:")
print(composed_space.id2row)
print(composed_space.cooccurrence_matrix)
Пример #15
0
from composes.composition.full_additive import FullAdditive

#training data
train_data = [("good", "car", "good_car"),
              ("good", "book", "good_book")
              ]

#load an argument space
arg_space = io_utils.load("./data/out/ex10.pkl")

#load a phrase space
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")
print "Training phrase space"
print phrase_space.id2row
print phrase_space.cooccurrence_matrix

#train a FullAdditive model on the data
my_comp = FullAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print "\nA:", my_comp._mat_a_t.transpose()
print "B:", my_comp._mat_b_t.transpose()

#use the model to compose the train data
composed_space  = my_comp.compose([("good", "bike", "good_bike")], 
                                  arg_space)
print "\nComposed space:" 
print composed_space.id2row
print composed_space.cooccurrence_matrix