def test_train(self): test_cases = [(self.m11, self.m21, self.ph1, 2, 3), (self.m12, self.m22, self.ph2, 2, 3), (self.m11, self.m21, DenseMatrix(np.mat([[0],[0]])), 0, 0), (SparseMatrix(self.m12), SparseMatrix(self.m22), SparseMatrix(self.ph2), 2, 3), (self.m11, DenseMatrix(np.mat([[0],[0]])), self.ph1, 3, 0), (DenseMatrix(np.mat([[0],[0]])), self.m11, self.ph1, 0, 3), (DenseMatrix(np.mat([[1,2,3]])), DenseMatrix(np.mat([[2,4,6]])), DenseMatrix(np.mat([[3,6,9]])), 0.6, 1.2), (DenseMatrix(np.mat([[0],[0]])), DenseMatrix(np.mat([[0],[0]])), DenseMatrix(np.mat([[0],[0]])), 0.0, 0.0) ] id2row_dict = {1:["a"],2:["a", "b"]} train_dict = {1:[("a", "a", "a")],2:[("a", "a", "a"), ("b", "b", "b")]} for m1, m2, ph, expected_alpha, expected_beta in test_cases: model = WeightedAdditive() arg_space1 = Space(m1, id2row_dict[m1.shape[0]],[]) arg_space2 = Space(m2, id2row_dict[m1.shape[0]],[]) ph_space = Space(ph, id2row_dict[m1.shape[0]],[]) train_data = train_dict[m1.shape[0]] #model._train(m1, m2, ph) model.train(train_data, (arg_space1, arg_space2), ph_space) self.assertAlmostEqual(model.alpha, expected_alpha, 8) self.assertAlmostEqual(model.beta, expected_beta, 8)
class WeightedAdditiveModel(AdditiveModel): weighted_additive = None new_space = None def __init__(self, space, alpha=None, beta=None, no_diff=False): AdditiveModel.__init__(self, space, no_diff=no_diff) self.weighted_additive = WeightedAdditive(alpha=alpha, beta=beta) def fit(self, train_pairs, verbose=False): AdditiveModel.fit(self, train_pairs, verbose=verbose) if verbose: print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs)) # First, we embed the derived vector into the original space (by simply adding a row) vec_space = Space(self.diff_vector, ['pattern_vector'], []) self.new_space = Space.vstack(self.space, vec_space) # class is designed to be run on a dataset with different function words (==patterns). # We use a dummy function word here. train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs] self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space) def predict(self, base, verbose=False): if self.weighted_additive is None: raise NameError('Error: Model has not yet been trained') composed_space = self.weighted_additive.compose([(base, 'pattern_vector', 'derived')], self.new_space) return composed_space.get_row('derived')
def test_weighted_additive(self): self.m12 = DenseMatrix(np.mat([[3,1],[9,2]])) self.m22 = DenseMatrix(np.mat([[4,3],[2,1]])) self.ph2 = DenseMatrix(np.mat([[18,11],[24,7]])) self.row = ["a", "b"] self.ft = ["f1","f2"] self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft) self.space2 = Space(DenseMatrix(self.ph2), ["a_a","a_b"], self.ft) m = WeightedAdditive() m.export(self.prefix + ".add1") m.train([("a","a","a_a")], self.space1, self.space2) m.export(self.prefix + ".add2")
def test_weighted_additive(self): self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]])) self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]])) self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]])) self.row = ["a", "b"] self.ft = ["f1", "f2"] self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft) self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft) m = WeightedAdditive() m.export(self.prefix + ".add1") m.train([("a", "a", "a_a")], self.space1, self.space2) m.export(self.prefix + ".add2")
def test_space_train(self): test_cases = [ ([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, Space(DenseMatrix(np.mat([[12,3],[6,2]])), ["a_b", "a_a"],["f1", "f2"]), 1, 1 ), ([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, Space(DenseMatrix(np.mat([[0,0],[0,0]])), ["a_b", "a_a"],["f1", "f2"]), 0, 0 ), ([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1, Space(DenseMatrix(np.mat([[0,0],[0,0]])), ["a_b", "a_a"],[]), 0, 0 ), ([("a", "b", "a_b")], self.space1, Space(DenseMatrix(np.mat([[21,5]])), ["a_b"],[]), 1, 2 ), ([("a", "b", "a_b"), ("bla", "b", "a_b"), ("a", "bla", "a_b")], self.space1, Space(DenseMatrix(np.mat([[21,5]])), ["a_b"],[]), 1, 2 ) ] for in_data, arg_space, phrase_space, alpha, beta in test_cases: model = WeightedAdditive() model.train(in_data, arg_space, phrase_space) self.assertAlmostEqual(model.alpha, alpha, 7) self.assertAlmostEqual(model.beta, beta, 7) comp_space = model.compose(in_data, arg_space) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertListEqual(comp_space.id2column, phrase_space.id2column) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertDictEqual(comp_space.column2id, phrase_space.column2id) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 8)
#ex13.py #------- from composes.utils import io_utils from composes.composition.weighted_additive import WeightedAdditive #training data train_data = [("good", "car", "good_car"), ("good", "book", "good_book")] #load an argument space arg_space = io_utils.load("./data/out/ex10.pkl") print arg_space.id2row print arg_space.cooccurrence_matrix #load a phrase space phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl") print phrase_space.id2row print phrase_space.cooccurrence_matrix #train a weighted additive model on the data my_comp = WeightedAdditive() my_comp.train(train_data, arg_space, phrase_space) #print its parameters print "alpha:", my_comp.alpha print "beta:", my_comp.beta
## Demonstrating on a smaller vector space for performance and demo ## But the word2vec space can also be used, as it is just another object of the same class #load an argument space arg_space = io_utils.load(dissect_toy_data+"out/ex10.pkl") print arg_space.id2row print arg_space.cooccurrence_matrix #load a phrase space phrase_space = io_utils.load(dissect_toy_data+"out/PHRASE_SS.ex10.pkl") print phrase_space.id2row print phrase_space.cooccurrence_matrix #train a weighted additive model on the data my_comp = WeightedAdditive() my_comp.train(train_data, arg_space, phrase_space) #print its parameters print "Weight Additive Model :: " print "alpha:", my_comp.alpha print "beta:", my_comp.beta #=============================================================================================================== print "="*80 print "Time Dilation model" #=============================================================================================================== #training data train_data = [("good", "car", "good_car"), ("good", "book", "good_book") ]