Пример #1
0
    def test_train(self):
        test_cases = [(self.m11, self.m21, self.ph1, 2, 3),
                      (self.m12, self.m22, self.ph2, 2, 3),
                      (self.m11, self.m21, DenseMatrix(np.mat([[0],[0]])), 0, 0),
                      (SparseMatrix(self.m12), SparseMatrix(self.m22),
                        SparseMatrix(self.ph2), 2, 3),
                      (self.m11, DenseMatrix(np.mat([[0],[0]])), self.ph1, 3, 0),
                      (DenseMatrix(np.mat([[0],[0]])), self.m11, self.ph1, 0, 3),
                      (DenseMatrix(np.mat([[1,2,3]])),
                       DenseMatrix(np.mat([[2,4,6]])),
                       DenseMatrix(np.mat([[3,6,9]])), 0.6, 1.2),
                      (DenseMatrix(np.mat([[0],[0]])),
                       DenseMatrix(np.mat([[0],[0]])),
                       DenseMatrix(np.mat([[0],[0]])), 0.0, 0.0)
                      ]

        id2row_dict = {1:["a"],2:["a", "b"]}
        train_dict = {1:[("a", "a", "a")],2:[("a", "a", "a"), ("b", "b", "b")]}

        for m1, m2, ph, expected_alpha, expected_beta in test_cases:
            model = WeightedAdditive()

            arg_space1 = Space(m1, id2row_dict[m1.shape[0]],[])
            arg_space2 = Space(m2, id2row_dict[m1.shape[0]],[])
            ph_space = Space(ph, id2row_dict[m1.shape[0]],[])
            train_data = train_dict[m1.shape[0]]

            #model._train(m1, m2, ph)
            model.train(train_data, (arg_space1, arg_space2), ph_space)

            self.assertAlmostEqual(model.alpha, expected_alpha, 8)
            self.assertAlmostEqual(model.beta, expected_beta, 8)
Пример #2
0
class WeightedAdditiveModel(AdditiveModel):

    weighted_additive = None
    new_space = None

    def __init__(self, space, alpha=None, beta=None, no_diff=False):
        AdditiveModel.__init__(self, space, no_diff=no_diff)
        self.weighted_additive = WeightedAdditive(alpha=alpha, beta=beta)

    def fit(self, train_pairs, verbose=False):
        AdditiveModel.fit(self, train_pairs, verbose=verbose)
        if verbose:
            print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs))
        # First, we embed the derived vector into the original space (by simply adding a row)
        vec_space = Space(self.diff_vector, ['pattern_vector'], [])
        self.new_space = Space.vstack(self.space, vec_space)
        #  class is designed to be run on a dataset with different function words (==patterns).
        # We use a dummy function word here.
        train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs]
        self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space)

    def predict(self, base, verbose=False):
        if self.weighted_additive is None:
            raise NameError('Error: Model has not yet been trained')
        composed_space = self.weighted_additive.compose([(base, 'pattern_vector', 'derived')], self.new_space)
        return composed_space.get_row('derived')
Пример #3
0
 def test_weighted_additive(self):
     
     self.m12 = DenseMatrix(np.mat([[3,1],[9,2]]))
     self.m22 = DenseMatrix(np.mat([[4,3],[2,1]]))
     self.ph2 = DenseMatrix(np.mat([[18,11],[24,7]]))
     self.row = ["a", "b"]
     self.ft = ["f1","f2"]
     self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
     self.space2 = Space(DenseMatrix(self.ph2), ["a_a","a_b"], self.ft)
     m = WeightedAdditive()
     m.export(self.prefix + ".add1")
     m.train([("a","a","a_a")], self.space1, self.space2)
     m.export(self.prefix + ".add2")
Пример #4
0
    def test_weighted_additive(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = WeightedAdditive()
        m.export(self.prefix + ".add1")
        m.train([("a", "a", "a_a")], self.space1, self.space2)
        m.export(self.prefix + ".add2")
Пример #5
0
    def test_space_train(self):
        test_cases = [ ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[12,3],[6,2]])),
                              ["a_b", "a_a"],["f1", "f2"]),
                        1, 1
                       ),
                      ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[0,0],[0,0]])),
                              ["a_b", "a_a"],["f1", "f2"]),
                        0, 0
                       ),
                      ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[0,0],[0,0]])),
                              ["a_b", "a_a"],[]),
                        0, 0
                       ),
                      ([("a", "b", "a_b")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[21,5]])),
                              ["a_b"],[]),
                        1, 2
                       ),
                       ([("a", "b", "a_b"), ("bla", "b", "a_b"), ("a", "bla", "a_b")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[21,5]])),
                              ["a_b"],[]),
                        1, 2
                       )
                      ]

        for in_data, arg_space, phrase_space, alpha, beta in test_cases:
            model = WeightedAdditive()
            model.train(in_data, arg_space, phrase_space)

            self.assertAlmostEqual(model.alpha, alpha, 7)
            self.assertAlmostEqual(model.beta, beta, 7)

            comp_space = model.compose(in_data, arg_space)
            self.assertListEqual(comp_space.id2row, phrase_space.id2row)
            self.assertListEqual(comp_space.id2column, phrase_space.id2column)

            self.assertDictEqual(comp_space.row2id, phrase_space.row2id)
            self.assertDictEqual(comp_space.column2id, phrase_space.column2id)

            np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                                 phrase_space.cooccurrence_matrix.mat,
                                                 8)
Пример #6
0
#ex13.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

#training data
train_data = [("good", "car", "good_car"), ("good", "book", "good_book")]

#load an argument space
arg_space = io_utils.load("./data/out/ex10.pkl")
print arg_space.id2row
print arg_space.cooccurrence_matrix

#load a phrase space
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")
print phrase_space.id2row
print phrase_space.cooccurrence_matrix

#train a weighted additive model on the data
my_comp = WeightedAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print "alpha:", my_comp.alpha
print "beta:", my_comp.beta
Пример #7
0
## Demonstrating on a smaller vector space for performance and demo
##	But the word2vec space can also be used, as it is just another object of the same class
#load an argument space
arg_space = io_utils.load(dissect_toy_data+"out/ex10.pkl")
print arg_space.id2row
print arg_space.cooccurrence_matrix

#load a phrase space
phrase_space = io_utils.load(dissect_toy_data+"out/PHRASE_SS.ex10.pkl")
print phrase_space.id2row
print phrase_space.cooccurrence_matrix

#train a weighted additive model on the data
my_comp = WeightedAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print "Weight Additive Model :: "
print "alpha:", my_comp.alpha
print "beta:", my_comp.beta

#===============================================================================================================
print "="*80
print "Time Dilation model"
#===============================================================================================================

#training data
train_data = [("good", "car", "good_car"),
              ("good", "book", "good_book")
              ]