/
sp_scorer.py
97 lines (81 loc) · 5.46 KB
/
sp_scorer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import theano, numpy
from theano import tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
class SPScorer(object):
def __init__(self, numargs, embed_size, pred_vocab_size, arg_vocab_size, initial_pred_rep=None, initial_arg_rep = None, margin = 5, lr=0.01, activation=T.nnet.sigmoid):
numpy_rng = numpy.random.RandomState(12345)
theano_rng = RandomStreams(54321)
self.lr = lr
#margin = 5
# Initializing predicate representations
if initial_pred_rep is not None:
num_preds, pred_dim = initial_pred_rep.shape
assert pred_vocab_size == num_arrays, "Initial predicate representation is not the same size as pred_vocab_size"
assert embed_size == pred_dim, "Initial predicate representation does not have the same dimensionality as embed_size"
else:
initial_pred_rep_range = 4 * numpy.sqrt(6. / (pred_vocab_size + embed_size))
initial_pred_rep = numpy.asarray(numpy_rng.uniform(low = -initial_pred_rep_range, high = initial_pred_rep_range, size = (pred_vocab_size, embed_size)))
self.pred_rep = theano.shared(value=initial_pred_rep, name='P')
# Initializing argument representations
if initial_arg_rep is not None:
arg_rep_len, arg_dim = initial_arg_rep.shape
assert arg_vocab_size == arg_rep_len, "Initial argument representation is not the same size as arg_vocab_size"
assert embed_size == arg_dim, "Initial argument representation does not have the same dimensionality as embed_size"
else:
initial_arg_rep_range = 4 * numpy.sqrt(6. / (arg_vocab_size + embed_size))
initial_arg_rep = numpy.asarray(numpy_rng.uniform(low = -initial_arg_rep_range, high = initial_arg_rep_range, size = (arg_vocab_size, embed_size)))
self.arg_rep = theano.shared(value=initial_arg_rep, name='A')
# Initialize scorer
scorer_dim = embed_size * (numargs + 1) # Predicate is +1
initial_scorer_range = 4 * numpy.sqrt(6. / scorer_dim)
initial_scorer = numpy.asarray(numpy_rng.uniform(low = -initial_scorer_range, high = initial_scorer_range, size = scorer_dim))
self.scorer = theano.shared(value=initial_scorer, name='s')
# Initialize indicator
indicator_dim = embed_size * (numargs + 1) # Predicate is +1
initial_indicator_range = 4 * numpy.sqrt(6. / (indicator_dim + numargs))
initial_indicator = numpy.asarray(numpy_rng.uniform(low = -initial_indicator_range, high = initial_indicator_range, size = (indicator_dim, numargs)))
self.indicator = theano.shared(value=initial_indicator, name='I')
# Define symbolic pred-arg
self.pred_ind = T.iscalar('p')
self.arg_inds = T.iscalars(numargs)
pred = self.pred_rep[self.pred_ind].reshape((1, embed_size))
args = self.arg_rep[self.arg_inds].reshape((1, embed_size * numargs))
pred_arg = activation(T.concatenate([pred, args], axis=1))
# Define symbolic rand pred-arg for training scorer
rand_pred_ind = theano_rng.random_integers(low=0, high=pred_vocab_size-1)
rand_arg_inds = theano_rng.random_integers([1, numargs], low=0, high=arg_vocab_size-1)
rand_pred = self.pred_rep[rand_pred_ind].reshape((1, embed_size))
rand_args = self.arg_rep[rand_arg_inds].reshape((1, embed_size * numargs))
rand_pred_arg = activation(T.concatenate([rand_pred, rand_args], axis=1))
# Define symbolic pred_rand-arg for training indicator
pred_rand_arg = activation(T.concatenate([pred, rand_args], axis=1))
# Define scores and loss
self.corr_score = T.sum(T.dot(pred_arg, self.scorer))
rand_score = T.sum(T.dot(rand_pred_arg, self.scorer))
self.margin_loss = T.maximum(0, margin - self.corr_score + rand_score)
# Define indicator values and loss
orig_ind_labels = T.constant(numpy.zeros(numargs))
self.indicator_pred = T.nnet.sigmoid(T.dot(pred_arg, self.indicator))
rand_ind_labels = T.constant(numpy.ones(numargs))
rand_indicator_pred = T.nnet.sigmoid(T.dot(pred_rand_arg, self.indicator))
self.indicator_loss = T.mean((self.indicator_pred - orig_ind_labels) ** 2) + T.mean((rand_indicator_pred - rand_ind_labels) ** 2)
# Define params and inputs
self.score_params = [self.pred_rep, self.arg_rep, self.scorer]
self.indicator_params = [self.pred_rep, self.arg_rep, self.indicator]
self.score_ind_inputs = [self.pred_ind] + list(self.arg_inds)
def get_score_train_function(self):
gparams = T.grad(self.margin_loss, self.score_params)
updates = []
for param, gparam in zip(self.score_params, gparams):
updates.append((param, param - self.lr * gparam))
return theano.function(self.score_ind_inputs, self.margin_loss, updates=updates)
def get_indicator_train_function(self):
gparams = T.grad(self.indicator_loss, self.indicator_params)
updates = []
for param, gparam in zip(self.indicator_params, gparams):
updates.append((param, param - self.lr * gparam))
return theano.function(self.score_ind_inputs, self.indicator_loss, updates=updates)
def get_score_function(self):
return theano.function(self.score_ind_inputs, self.corr_score)
def get_indicator_function(self):
return theano.function(self.score_ind_inputs, self.indicator_pred)