-
Notifications
You must be signed in to change notification settings - Fork 0
/
training.py
120 lines (100 loc) · 3.82 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from helpers import sigmoid
from globals import *
import numpy as np
import random
def split_pos_neg(data):
'''
splits the training data into two sets, one with positive
examples and one with negative examples
'''
pos_data = [(q,h,t) for q,h,t in data if t == 1]
neg_data = [(q,h,t) for q,h,t in data if t == 0]
return pos_data, neg_data
def select_batch(pos_data, neg_data, num_pos=32, num_neg=32*5):
'''
Selects a random batch from all training data
'''
training_batch = []
negatives_to_add = []
training_batch += random.sample(pos_data, num_pos)
#for q,h,t in training_batch:
# for q_,h_,t_ in neg_data:
# if (q == q_).all():
# negatives_to_add += [(q_,h_,t_)]
#training_batch += negatives_to_add
if neg_data:
training_batch += random.sample(neg_data, num_neg)
return training_batch
def one_iter(examples, transforms, b, learning_rate=0.01):
'''
Runs one iteration through all provided examples
'''
#maxnorm = 0
N = len(examples)
total_loss = 0
for q,h,t in examples:
# project the query with all of the projection matrices
p = [np.dot(transforms[i],q).T for i in range(k)]
# Normalize the projections
for i in range(k):
p[i] = p[i] / np.sqrt((np.sum(p[i]**2)))
#p = p/np.sqrt(np.sum(p**2))
# compute similarities between all projections and the candidate
s = np.dot(p,h)
# Apply sigmoid function and find the transformation that resulted
# in the closest projection to the candidate
sigmoids = sigmoid(np.add(s,b))
#sigmoids = sigmoid(s)
maxsim = np.argmax(sigmoids)
# y: predicted similarity
# x: the corresponding projected vector
y = sigmoids[maxsim]
x = p[maxsim]
if(y == 0 or y==1):
print("Perfect hit")
continue
# Compute the loss and update the corresponding projection matrix
# in accordance with gradient descent.
loss = t*np.log(y) - (np.subtract(1,t)*np.log(np.subtract(1,y)))
total_loss += -abs(loss)
gradient = np.dot(x.T,loss)
#gradient = np.dot(np.subtract(x,h))
#if(t == 1):
# gradient = np.dot(x.T,np.subtract(x,h))/N
#elif t == 0:
# gradient = np.dot(x.T, np.subtract(x,h))*((-1)/N)
#gradnorm = np.linalg.norm(gradient,2)
#if(gradnorm > maxnorm):
# maxnorm = gradnorm
#print("Grad: ",gradient,"\n")
#print("Grad*lr: ", np.multiply(learning_rate,gradient))
prev_theta = transforms[maxsim]
#print("Before: ", prev_theta)
transforms[maxsim] = np.subtract(prev_theta, np.multiply(learning_rate,gradient))
#print("OLD: ", prev_theta ,"\n", "NEW: ", transforms[maxsim],"\n")
#print("After: ", transforms[maxsim],"\n")
#Update the bias
b_loss = min(1,max(0,y + b[maxsim])) - t
prev_b = b[maxsim]
b[maxsim] = np.subtract(prev_b, np.multiply(learning_rate, b_loss))
#print("Maxnorm: ", maxnorm, "\n")
return total_loss
def train(embeddings, train_examples, num_iter=10, batching=False, learning_rate=0.01):
'''
The main training algorithm.
'''
rounds_per_batch = 8
if batching:
pos_examples,neg_examples = split_pos_neg(train_examples)
transforms = [] #phi
for i in range(k):
transforms.append(np.identity(d) +np.random.normal(0, 0.5, [d,d]))
b = np.zeros(k) #bias
for it in range(num_iter):
if batching:
train_examples = select_batch(pos_examples,neg_examples)
for _ in range(rounds_per_batch):
loss = one_iter(train_examples, transforms,b, learning_rate)
print("Loss, iter: ", loss/len(train_examples), it, "\n")
#print(b)
return (transforms, b)