def test_space_compose_dense(self): test_cases = [ ([("a", "b", "a_b")], self.space4, self.space5, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a", "b", "a_b")], self.space4, self.space6, np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]])), ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7, DenseMatrix.identity(2), DenseMatrix.identity(2)), ] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal( comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, []) self.assertDictEqual(comp_space.column2id, {}) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertFalse(comp_model._has_intercept)
def test_space_train_dense(self): test_cases = [([("a","b","a_b")], self.space4, self.space5), ([("a","b","a_b")], self.space4, self.space6), ([("a","b","a_b"),("a","b","a_a")], self.space4, self.space7), ] learners = [RidgeRegressionLearner(intercept=False, crossvalidation=False, param=0), LstsqRegressionLearner(intercept=False), LstsqRegressionLearner(intercept=True)] for in_data, arg_space, phrase_space in test_cases: for learner_ in learners: comp_model = FullAdditive(learner=learner_) comp_model.train(in_data, arg_space, phrase_space) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, phrase_space.id2column) self.assertDictEqual(comp_space.column2id, phrase_space.column2id) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertEqual(comp_model._has_intercept, learner_._intercept)
def test_train1(self): test_cases = [(self.m11, self.m21, self.ph1, np.mat([[2]]), np.mat([[3]])), (self.m11, self.m21, DenseMatrix(np.mat([[0],[0]])), np.mat([[0]]), np.mat([[0]]))] for m1, m2, ph, expected_a, expected_b in test_cases: comp_model = FullAdditive(learner=LstsqRegressionLearner(intercept=False)) comp_model._train(m1, m2, ph) np.testing.assert_array_almost_equal(comp_model._mat_a_t.transpose().mat, expected_a, 10) np.testing.assert_array_almost_equal(comp_model._mat_b_t.transpose().mat, expected_b, 10)
def test_space_compose_sparse(self): #WHAT TO DO HERE??? #PARAMTERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE?? test_cases = [([("a","b","a_b")], self.space1, self.space2, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a","b","a_b")], self.space1, self.space3, np.mat([[0,0],[0,0]]), np.mat([[0,0],[0,0]])) ] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat.todense(), phrase_space.cooccurrence_matrix.mat.todense(), 10)
def test_train1(self): test_cases = [(self.m11, self.m21, self.ph1, np.mat([[2]]), np.mat([[3]])), (self.m11, self.m21, DenseMatrix(np.mat([[0], [0]])), np.mat([[0]]), np.mat([[0]]))] for m1, m2, ph, expected_a, expected_b in test_cases: comp_model = FullAdditive(learner=LstsqRegressionLearner( intercept=False)) comp_model._train(m1, m2, ph) np.testing.assert_array_almost_equal( comp_model._mat_a_t.transpose().mat, expected_a, 10) np.testing.assert_array_almost_equal( comp_model._mat_b_t.transpose().mat, expected_b, 10)
def test_full_additive(self): self.m12 = DenseMatrix(np.mat([[3,1],[9,2]])) self.m22 = DenseMatrix(np.mat([[4,3],[2,1]])) self.ph2 = DenseMatrix(np.mat([[18,11],[24,7]])) self.row = ["a", "b"] self.ft = ["f1","f2"] self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft) self.space2 = Space(DenseMatrix(self.ph2), ["a_a","a_b"], self.ft) m = FullAdditive() self.assertRaises(IllegalStateError, m.export,self.prefix + ".full1") m.train([("a","b","a_b"),("a","a","a_a")], self.space1, self.space2) m.export(self.prefix + ".full2")
def test_space_compose_sparse(self): #WHAT TO DO HERE??? #PARAMETERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE?? test_cases = [([("a", "b", "a_b")], self.space1, self.space2, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a", "b", "a_b")], self.space1, self.space3, np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]]))] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal( comp_space.cooccurrence_matrix.mat.todense(), phrase_space.cooccurrence_matrix.mat.todense(), 10)
def test_space_compose_dense(self): test_cases = [([("a","b","a_b")], self.space4, self.space5, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a","b","a_b")], self.space4, self.space6, np.mat([[0,0],[0,0]]), np.mat([[0,0],[0,0]])), ([("a","b","a_b"),("a","b","a_a")], self.space4, self.space7, DenseMatrix.identity(2), DenseMatrix.identity(2)), ] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, []) self.assertDictEqual(comp_space.column2id, {}) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertFalse(comp_model._has_intercept)
def test_train2(self): dim_ = 2 dim_1 = 3 dim_2 = 5 for dim in [dim_1 + dim_2, dim_1 + dim_2 + 2]: expected_a = np.mat(np.random.random((dim_,dim_1))) expected_b = np.mat(np.random.random((dim_,dim_2))) m1 = np.mat(np.random.random((dim,dim_1))) m2 = np.mat(np.random.random((dim,dim_2))) ph = np.mat(expected_a*m1.T + expected_b*m2.T) comp_model = FullAdditive(learner=LstsqRegressionLearner(intercept=False)) comp_model._train(DenseMatrix(m1),DenseMatrix(m2), DenseMatrix(ph).transpose()) np.testing.assert_array_almost_equal(comp_model._mat_a_t.transpose().mat, expected_a, 10) np.testing.assert_array_almost_equal(comp_model._mat_b_t.transpose().mat, expected_b, 10) for dim in [dim_1 + dim_2 + 6, dim_1 + dim_2 + 20]: expected_a = np.mat(np.random.random((dim_,dim_1))) expected_b = np.mat(np.random.random((dim_,dim_2))) m1 = np.mat(np.random.random((dim,dim_1))) m2 = np.mat(np.random.random((dim,dim_2))) ph = np.mat(expected_a*m1.T + expected_b*m2.T) comp_model = FullAdditive(learner=LstsqRegressionLearner(intercept=True)) comp_model._train(DenseMatrix(m1),DenseMatrix(m2), DenseMatrix(ph).transpose()) np.testing.assert_array_almost_equal(comp_model._mat_a_t.transpose().mat, expected_a, 10) np.testing.assert_array_almost_equal(comp_model._mat_b_t[:-1,:].transpose().mat, expected_b, 10)
def test_space_train_dense(self): test_cases = [ ([("a", "b", "a_b")], self.space4, self.space5), ([("a", "b", "a_b")], self.space4, self.space6), ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7), ] learners = [ RidgeRegressionLearner(intercept=False, crossvalidation=False, param=0), LstsqRegressionLearner(intercept=False), LstsqRegressionLearner(intercept=True) ] for in_data, arg_space, phrase_space in test_cases: for learner_ in learners: comp_model = FullAdditive(learner=learner_) comp_model.train(in_data, arg_space, phrase_space) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal( comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, phrase_space.id2column) self.assertDictEqual(comp_space.column2id, phrase_space.column2id) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertEqual(comp_model._has_intercept, learner_._intercept)
def test_full_additive(self): self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]])) self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]])) self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]])) self.row = ["a", "b"] self.ft = ["f1", "f2"] self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft) self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft) m = FullAdditive() self.assertRaises(IllegalStateError, m.export, self.prefix + ".full1") m.train([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, self.space2) m.export(self.prefix + ".full2")
def test_train2(self): dim_ = 2 dim_1 = 3 dim_2 = 5 for dim in [dim_1 + dim_2, dim_1 + dim_2 + 2]: expected_a = np.mat(np.random.random((dim_, dim_1))) expected_b = np.mat(np.random.random((dim_, dim_2))) m1 = np.mat(np.random.random((dim, dim_1))) m2 = np.mat(np.random.random((dim, dim_2))) ph = np.mat(expected_a * m1.T + expected_b * m2.T) comp_model = FullAdditive(learner=LstsqRegressionLearner( intercept=False)) comp_model._train(DenseMatrix(m1), DenseMatrix(m2), DenseMatrix(ph).transpose()) np.testing.assert_array_almost_equal( comp_model._mat_a_t.transpose().mat, expected_a, 10) np.testing.assert_array_almost_equal( comp_model._mat_b_t.transpose().mat, expected_b, 10) for dim in [dim_1 + dim_2 + 6, dim_1 + dim_2 + 20]: expected_a = np.mat(np.random.random((dim_, dim_1))) expected_b = np.mat(np.random.random((dim_, dim_2))) m1 = np.mat(np.random.random((dim, dim_1))) m2 = np.mat(np.random.random((dim, dim_2))) ph = np.mat(expected_a * m1.T + expected_b * m2.T) comp_model = FullAdditive(learner=LstsqRegressionLearner( intercept=True)) comp_model._train(DenseMatrix(m1), DenseMatrix(m2), DenseMatrix(ph).transpose()) np.testing.assert_array_almost_equal( comp_model._mat_a_t.transpose().mat, expected_a, 10) np.testing.assert_array_almost_equal( comp_model._mat_b_t[:-1, :].transpose().mat, expected_b, 10)
composed_space = comp_model.compose(test_phrases, space) print "Reading similarity test data..." test_similarity_file = data_path + "ML08data_new.txt" test_pairs = io_utils.read_tuple_list(test_similarity_file, fields=[0, 1]) gold = io_utils.read_list(test_similarity_file, field=2) print "Computing similarity with lexical function..." pred = composed_space.get_sims(test_pairs, CosSimilarity()) #use this composed space to assign similarities print "Scoring lexical function..." print scoring_utils.score(gold, pred, "spearman") print "Training Full Additive composition model..." comp_model = FullAdditive(learner=RidgeRegressionLearner(param=2)) comp_model.train(train_data, space, per_space) composed_space = comp_model.compose(test_phrases, space) pred = composed_space.get_sims(test_pairs, CosSimilarity()) print scoring_utils.score(gold, pred, "spearman") print "Training Weighted Additive composition model..." comp_model = WeightedAdditive() comp_model.train(train_data, space, per_space) print "alpha, beta:", comp_model.alpha, comp_model.beta composed_space = comp_model.compose(test_phrases, space) pred = composed_space.get_sims(test_pairs, CosSimilarity()) print scoring_utils.score(gold, pred, "spearman") print "Training Dilation composition model..." comp_model = Dilation()
from composes.composition.full_additive import FullAdditive #training data train_data = [("good", "car", "good_car"), ("good", "book", "good_book") ] #load an argument space arg_space = io_utils.load("./data/out/ex10.pkl") #load a phrase space phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl") print("Training phrase space") print(phrase_space.id2row) print(phrase_space.cooccurrence_matrix) #train a FullAdditive model on the data my_comp = FullAdditive() my_comp.train(train_data, arg_space, phrase_space) #print its parameters print("\nA:", my_comp._mat_a_t.transpose()) print("B:", my_comp._mat_b_t.transpose()) #use the model to compose the train data composed_space = my_comp.compose([("good", "bike", "good_bike")], arg_space) print("\nComposed space:") print(composed_space.id2row) print(composed_space.cooccurrence_matrix)
from composes.composition.full_additive import FullAdditive #training data train_data = [("good", "car", "good_car"), ("good", "book", "good_book") ] #load an argument space arg_space = io_utils.load("./data/out/ex10.pkl") #load a phrase space phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl") print "Training phrase space" print phrase_space.id2row print phrase_space.cooccurrence_matrix #train a FullAdditive model on the data my_comp = FullAdditive() my_comp.train(train_data, arg_space, phrase_space) #print its parameters print "\nA:", my_comp._mat_a_t.transpose() print "B:", my_comp._mat_b_t.transpose() #use the model to compose the train data composed_space = my_comp.compose([("good", "bike", "good_bike")], arg_space) print "\nComposed space:" print composed_space.id2row print composed_space.cooccurrence_matrix