Пример #1
0
class WeightedAdditiveModel(AdditiveModel):

    weighted_additive = None
    new_space = None

    def __init__(self, space, alpha=None, beta=None, no_diff=False):
        AdditiveModel.__init__(self, space, no_diff=no_diff)
        self.weighted_additive = WeightedAdditive(alpha=alpha, beta=beta)

    def fit(self, train_pairs, verbose=False):
        AdditiveModel.fit(self, train_pairs, verbose=verbose)
        if verbose:
            print 'fit: Fitting a weighted additive model on %d pairs' % (len(train_pairs))
        # First, we embed the derived vector into the original space (by simply adding a row)
        vec_space = Space(self.diff_vector, ['pattern_vector'], [])
        self.new_space = Space.vstack(self.space, vec_space)
        #  class is designed to be run on a dataset with different function words (==patterns).
        # We use a dummy function word here.
        train_pairs_ext = [(base, 'pattern_vector', derived) for (base, derived) in train_pairs]
        self.weighted_additive.train(train_pairs_ext, self.new_space, self.new_space)

    def predict(self, base, verbose=False):
        if self.weighted_additive is None:
            raise NameError('Error: Model has not yet been trained')
        composed_space = self.weighted_additive.compose([(base, 'pattern_vector', 'derived')], self.new_space)
        return composed_space.get_row('derived')
Пример #2
0
    def test_train(self):
        test_cases = [(self.m11, self.m21, self.ph1, 2, 3),
                      (self.m12, self.m22, self.ph2, 2, 3),
                      (self.m11, self.m21, DenseMatrix(np.mat([[0],[0]])), 0, 0),
                      (SparseMatrix(self.m12), SparseMatrix(self.m22),
                        SparseMatrix(self.ph2), 2, 3),
                      (self.m11, DenseMatrix(np.mat([[0],[0]])), self.ph1, 3, 0),
                      (DenseMatrix(np.mat([[0],[0]])), self.m11, self.ph1, 0, 3),
                      (DenseMatrix(np.mat([[1,2,3]])),
                       DenseMatrix(np.mat([[2,4,6]])),
                       DenseMatrix(np.mat([[3,6,9]])), 0.6, 1.2),
                      (DenseMatrix(np.mat([[0],[0]])),
                       DenseMatrix(np.mat([[0],[0]])),
                       DenseMatrix(np.mat([[0],[0]])), 0.0, 0.0)
                      ]

        id2row_dict = {1:["a"],2:["a", "b"]}
        train_dict = {1:[("a", "a", "a")],2:[("a", "a", "a"), ("b", "b", "b")]}

        for m1, m2, ph, expected_alpha, expected_beta in test_cases:
            model = WeightedAdditive()

            arg_space1 = Space(m1, id2row_dict[m1.shape[0]],[])
            arg_space2 = Space(m2, id2row_dict[m1.shape[0]],[])
            ph_space = Space(ph, id2row_dict[m1.shape[0]],[])
            train_data = train_dict[m1.shape[0]]

            #model._train(m1, m2, ph)
            model.train(train_data, (arg_space1, arg_space2), ph_space)

            self.assertAlmostEqual(model.alpha, expected_alpha, 8)
            self.assertAlmostEqual(model.beta, expected_beta, 8)
Пример #3
0
    def test_space_train(self):
        test_cases = [ ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[12,3],[6,2]])),
                              ["a_b", "a_a"],["f1", "f2"]),
                        1, 1
                       ),
                      ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[0,0],[0,0]])),
                              ["a_b", "a_a"],["f1", "f2"]),
                        0, 0
                       ),
                      ([("a", "b", "a_b"), ("a", "a", "a_a")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[0,0],[0,0]])),
                              ["a_b", "a_a"],[]),
                        0, 0
                       ),
                      ([("a", "b", "a_b")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[21,5]])),
                              ["a_b"],[]),
                        1, 2
                       ),
                       ([("a", "b", "a_b"), ("bla", "b", "a_b"), ("a", "bla", "a_b")],
                        self.space1,
                        Space(DenseMatrix(np.mat([[21,5]])),
                              ["a_b"],[]),
                        1, 2
                       )
                      ]

        for in_data, arg_space, phrase_space, alpha, beta in test_cases:
            model = WeightedAdditive()
            model.train(in_data, arg_space, phrase_space)

            self.assertAlmostEqual(model.alpha, alpha, 7)
            self.assertAlmostEqual(model.beta, beta, 7)

            comp_space = model.compose(in_data, arg_space)
            self.assertListEqual(comp_space.id2row, phrase_space.id2row)
            self.assertListEqual(comp_space.id2column, phrase_space.id2column)

            self.assertDictEqual(comp_space.row2id, phrase_space.row2id)
            self.assertDictEqual(comp_space.column2id, phrase_space.column2id)

            np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                                 phrase_space.cooccurrence_matrix.mat,
                                                 8)
Пример #4
0
    def test_weighted_additive(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = WeightedAdditive()
        m.export(self.prefix + ".add1")
        m.train([("a", "a", "a_a")], self.space1, self.space2)
        m.export(self.prefix + ".add2")
Пример #5
0
 def test_weighted_additive(self):
     
     self.m12 = DenseMatrix(np.mat([[3,1],[9,2]]))
     self.m22 = DenseMatrix(np.mat([[4,3],[2,1]]))
     self.ph2 = DenseMatrix(np.mat([[18,11],[24,7]]))
     self.row = ["a", "b"]
     self.ft = ["f1","f2"]
     self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
     self.space2 = Space(DenseMatrix(self.ph2), ["a_a","a_b"], self.ft)
     m = WeightedAdditive()
     m.export(self.prefix + ".add1")
     m.train([("a","a","a_a")], self.space1, self.space2)
     m.export(self.prefix + ".add2")
def create_model(model, alpha, beta, lambda_):

    #TODO: IMPORTANT here: if alpha, beta of lambda are none
    model_dict = {
        "weighted_add": WeightedAdditive,
        "dilation": Dilation,
        "mult": Multiplicative
    }
    if not model in model_dict:
        raise ValueError("Invalid model:%s" % model)

    if model == "weighted_add":
        model_obj = WeightedAdditive(alpha, beta)
    elif model == "dilation":
        model_obj = Dilation(lambda_)
    else:
        model_obj = Multiplicative()
    return model_obj
Пример #7
0
    def test_compose(self):

        model = WeightedAdditive(2,3)
        np.testing.assert_array_equal(model._compose(self.m11, self.m21).mat,
                                                      self.ph1.mat)

        model = WeightedAdditive()
        np.testing.assert_array_equal(model._compose(self.m11, self.m21).mat,
                                                      np.mat([[7/2.],[11/2.]]))

        model = WeightedAdditive(0.5)
        np.testing.assert_array_equal(model._compose(self.m11, self.m21).mat,
                                                      np.mat([[7/2.],[11/2.]]))
Пример #8
0
        els_for_comp.append(element)
    return els_for_comp


typ_space = create_space(TypDmFile, TypRowsFile)
distr_space = create_space(DistrDmFile, DistrRowsFile)

#load a space from a pickle file
#my_space = io_utils.load("./sharp/lexfunc/lexfunc_Ridge_pract.pkl")

#distributional vectors processing
distr_space = distr_space.apply(PpmiWeighting())
distr_space = distr_space.apply(Svd(300))
#io_utils.save(distr_space, "./spaces/smooth_phrases_ppmi.pkl")

items = items_from_file(itemsFile)
els_for_comp = elements_for_composition(items)

my_comp = WeightedAdditive(alpha=1, beta=1)
distr_space = my_comp.compose(els_for_comp, distr_space)

pairs = pairs(items)

predicted = distr_space.get_sims(pairs, CosSimilarity())
gold = typ_space.get_sims(pairs, CosSimilarity())

#compute correlations
print "Spearman"
print scoring_utils.score(gold, predicted, "spearman")
print "Pearson"
print scoring_utils.score(gold, predicted, "pearson")
Пример #9
0
#testAnalogy.py
#argv[1]: space pkl file
#argv[2]: analogy test file
#EXAMPLE: python testAnalogy.py ../../spaces/wikipedia.pkl analogy_dataset.txt
#-------
from composes.utils import io_utils
from composes.utils import scoring_utils
from composes.similarity.cos import CosSimilarity
from composes.composition.weighted_additive import WeightedAdditive
import sys

add = WeightedAdditive(alpha = 1, beta = 1.2)
sub = WeightedAdditive(alpha = 1, beta = -1)

#read in a space
space = io_utils.load(sys.argv[1])


def computeAnalogy(w1,w2,w3):
	composed_space = sub.compose([(w1,w2, "step1")], space)
	composed_space2 = add.compose([("step1", w3, "step2")], (composed_space,space))
	guess=composed_space2.get_neighbours("step2", 1, CosSimilarity(),space)
	return guess


score=0

#read in test file
fname = sys.argv[2]
f=open(fname,'r')
flines=f.readlines()
Пример #10
0
# 2) a file with short phrases (2 words, e.g. parliamentary potato)
#-------
from composes.utils import io_utils
from composes.utils import scoring_utils
from composes.similarity.cos import CosSimilarity
from composes.composition.weighted_additive import WeightedAdditive
from composes.composition.multiplicative import Multiplicative
from composes.transformation.scaling.row_normalization import RowNormalization
import numpy as np
import sys

#read in a space
my_space = io_utils.load(sys.argv[1])
my_space = my_space.apply(RowNormalization())

add = WeightedAdditive(alpha = 1, beta = 1)
mult = Multiplicative()


#compute multiplication/addition of a list of word pairs
fname = sys.argv[2]
word_pairs = io_utils.read_tuple_list(fname, fields=[0,1])

lengths=[]
found=True
for wp in word_pairs:
	try:
		v1=my_space.get_row(wp[0])
		v2=my_space.get_row(wp[1])
	except KeyError:
		#print wp[0],"or",wp[1],"not found"
from __future__ import print_function
import sys
from random import randint
from itertools import count
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive
from composes.semantic_space.space import Space


stacked_space = io_utils.load("gastrovec.ppmi.svd20.pkl")

WA = WeightedAdditive(alpha = 1, beta = 1)

recipes = {}
max_size = 0
with open("../corpus_collection/composition_counts.txt") as f:
    for line in f:
        words = line.split()
        recipes[words[0]] = words[1:]
        if len(words)-1 > max_size:
            max_size = len(words)-1

WA = WeightedAdditive(alpha = 1, beta = 1)
last_space = None
number = count()
for size in xrange(max_size,1,-1):
    relevant = (rec for rec in recipes if len(recipes[rec]) == size)
    print(size)
    composition = []
    for recipe in relevant:
        old = recipes[recipe]
Пример #12
0
#ex10.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

#load a space
my_space = io_utils.load("./data/out/ex10.pkl")

print my_space.id2row
print my_space.cooccurrence_matrix

# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

# use the model to compose words in my_space
composed_space = my_comp.compose([("good", "book", "good_book"),
                                  ("good", "car", "good_car")], 
                                 my_space)

print composed_space.id2row
print composed_space.cooccurrence_matrix

#save the composed space
io_utils.save(composed_space, "data/out/PHRASE_SS.ex10.pkl")


Пример #13
0
print "Computing similarity with lexical function..."
pred = composed_space.get_sims(test_pairs, CosSimilarity())

#use this composed space to assign similarities
print "Scoring lexical function..."
print scoring_utils.score(gold, pred, "spearman")

print "Training Full Additive composition model..."
comp_model = FullAdditive(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Training Weighted Additive composition model..."
comp_model = WeightedAdditive()
comp_model.train(train_data, space, per_space)
print "alpha, beta:", comp_model.alpha, comp_model.beta
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Training Dilation composition model..."
comp_model = Dilation()
comp_model.train(train_data, space, per_space)
print "lambda:", comp_model._lambda
composed_space = comp_model.compose(test_phrases, space)
pred = composed_space.get_sims(test_pairs, CosSimilarity())
print scoring_utils.score(gold, pred, "spearman")

print "Multiplicative composition model..."
Пример #14
0
##########################################################################

from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive
from composes.similarity.cos import CosSimilarity
import sys



pkl=sys.argv[1]
base=sys.argv[2]
minus=sys.argv[3]
plus=sys.argv[4]

space = io_utils.load(pkl)

# instantiate an additive and subtractive model
add = WeightedAdditive(alpha = 1, beta = 1)
sub = WeightedAdditive(alpha = 1, beta = -1)


#print space.get_neighbours(base, 10, CosSimilarity())

print "Subtracting",minus,"from",base
composed_space = sub.compose([(base, minus, "step1")], space)
#print composed_space.get_neighbours("step1", 10, CosSimilarity(),space)

print "Adding",plus,"..."
composed_space2 = add.compose([("step1", plus, "step2")], (composed_space,space))
print composed_space2.get_neighbours("step2", 10, CosSimilarity(),space)
def ins(lst, el):
    if len(lst) < num:
        lst.append(el)
        lst.sort(reverse=True)
        return
    else:
        if el[0] > lst[-1][0]:
            lst.pop(-1)
            lst.append(el)
            lst.sort(reverse=True)

stacked = io_utils.load("gastrovec.ppmi.svd20.pkl")
recicomp = io_utils.load(recipe_space)

WA = WeightedAdditive(alpha = 1, beta = 1)
number = count()

ingredients = []
print("Enter ingredients, enter when done")
while True:
    ingredient = raw_input("> ").replace(" ","_")
    if ingredient == "":
        break
    if ingredient not in stacked.id2row:
        print("(not found, skipping)")
        continue
    ingredients.append(ingredient)

name = ""
while True:
Пример #16
0
#ex13.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

#training data
train_data = [("good", "car", "good_car"), ("good", "book", "good_book")]

#load an argument space
arg_space = io_utils.load("./data/out/ex10.pkl")
print arg_space.id2row
print arg_space.cooccurrence_matrix

#load a phrase space
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")
print phrase_space.id2row
print phrase_space.cooccurrence_matrix

#train a weighted additive model on the data
my_comp = WeightedAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print "alpha:", my_comp.alpha
print "beta:", my_comp.beta
Пример #17
0
#ex11.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

#save it to pickle
io_utils.save(my_comp, "./data/out/model01.pkl")

#print its parameters
my_comp.export("./data/out/model01.params")

Пример #18
0
 def __init__(self, space, alpha=None, beta=None, no_diff=False):
     AdditiveModel.__init__(self, space, no_diff=no_diff)
     self.weighted_additive = WeightedAdditive(alpha=alpha, beta=beta)
Пример #19
0
# 2) a file with short phrases (2 words, e.g. parliamentary potato)
#-------
from composes.utils import io_utils
from composes.utils import scoring_utils
from composes.similarity.cos import CosSimilarity
from composes.composition.weighted_additive import WeightedAdditive
from composes.composition.multiplicative import Multiplicative
from composes.transformation.scaling.row_normalization import RowNormalization
import numpy as np
import sys

#read in a space
my_space = io_utils.load(sys.argv[1])
my_space = my_space.apply(RowNormalization())

add = WeightedAdditive(alpha=1, beta=1)
mult = Multiplicative()

#compute multiplication/addition of a list of word pairs
fname = sys.argv[2]
word_pairs = io_utils.read_tuple_list(fname, fields=[0, 1])

lengths = []
found = True
for wp in word_pairs:
    try:
        v1 = my_space.get_row(wp[0])
        v2 = my_space.get_row(wp[1])
    except KeyError:
        #print wp[0],"or",wp[1],"not found"
        found = False
Пример #20
0
# Just exposing the possibility to learn peripheral space if we have a corpus where phrases etc are marked 
# as one token. Then we can use the word2vec_bin_to_DISSECT_dm convertor to generate a similar dm
# print "Creating peripheral space.."
# per_space = PeripheralSpace.build(space,
#                                   format = "dm",
#                                   data ="SOME_PATH_FOR_A_WORD_TO_VEC_PERIPHERAL_SPACE_DATA"
#                                   )
		

# Debug
# print space.cooccurrence_matrix
# print space.id2row


# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

# use the model to compose words in my_space
composed_space = my_comp.compose([("good", "book", "good_book"),
                                  ("good", "car", "good_car")], 
                                 space)

print composed_space.id2row
print composed_space.cooccurrence_matrix
print composed_space.get_sims([("good_car","good_book")], CosSimilarity()) # Similarity metric

#===============================================================================================================
print "="*80
#===============================================================================================================
##Training Models
Пример #21
0
#ex10.py
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive

#load a space
my_space = io_utils.load("./data/out/ex10.pkl")

print my_space.id2row
print my_space.cooccurrence_matrix

# instantiate a weighted additive model
my_comp = WeightedAdditive(alpha = 1, beta = 1)

# use the model to compose words in my_space
composed_space = my_comp.compose([("good", "book", "good_book"),
                                  ("good", "car", "good_car")],
                                 my_space)

print composed_space.id2row
print composed_space.cooccurrence_matrix

#save the composed space
io_utils.save(composed_space, "data/out/PHRASE_SS.ex10.pkl")


Пример #22
0
#-------
from composes.utils import io_utils
from composes.composition.weighted_additive import WeightedAdditive


#training data
train_data = [("good", "car", "good_car"),
              ("good", "book", "good_book")
              ]

#load an argument space
arg_space = io_utils.load("./data/out/ex10.pkl")
print arg_space.id2row
print arg_space.cooccurrence_matrix

#load a phrase space
phrase_space = io_utils.load("data/out/PHRASE_SS.ex10.pkl")
print phrase_space.id2row
print phrase_space.cooccurrence_matrix

#train a weighted additive model on the data
my_comp = WeightedAdditive()
my_comp.train(train_data, arg_space, phrase_space)

#print its parameters
print "alpha:", my_comp.alpha
print "beta:", my_comp.beta