/
training.py
92 lines (74 loc) · 2.88 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#! /usr/bin/env python
''' Training module (mini-batch) '''
import numpy as np
import theano
from theano import tensor as T
from model import AutoEncoder
import pdb
import resource
#rsrc = resource.RLIMIT_DATA
#soft, hard = resource.getrlimit(rsrc)
#print 'Soft limit starts as :', soft
#resource.setrlimit(rsrc, (10000000, hard))
#soft, hard = resource.getrlimit(rsrc)
#print 'Soft limit changed to :', soft
class trainAE(object):
def __init__(self, path, k, lr= 0.1, batch_size=1, loss='bce', n_epochs=100):
'''
Arguments:
path : path to training data
k : hidde unit's dimension
lr : learning rate
batch_size : batch_size for training, currently set to 1
loss : loss function (bce, rmse) to train AutoEncoder
n_epochs : number of epochs for training
'''
self.AE = AutoEncoder(path, k)
# Definne the autoencoder model
#self.AE.model()
self.AE.model_batch()
self.epochs = n_epochs
def train(self):
T = self.AE.T
# COnverting to csr format for indexing
T = T.tocsr()
#pdb.set_trace()
nonzero_indices = T.nonzero()
for epoch in xrange(self.epochs):
print("Running epoch %d"%(epoch))
for i in np.unique(nonzero_indices[0]):
# get indices of observed values from the user 'i' 's vector
indices = T[i, :].nonzero()[1]
#print indices
#indices = indices.reshape(indices.shape[0],)
# Get correspoding ratings
ratings = T[i, indices].toarray()
#print ratings
ratings = ratings.reshape(ratings.shape[1],)
# Convert inputs to theano datatype
indices = indices.astype(np.int32)
ratings = ratings.astype(np.int32)
#pdb.set_trace()
loss = self.AE.ae(indices, ratings)
print("Loss at epoch %d is %f"%(epoch, loss))
# Batch training method
def train_batch(self, batch_size):
T = self.AE.T
T = T.tocsr()
nonzero_indices = T.nonzero()
#pdb.set_trace()
n_users = len(np.unique(nonzero_indices[0]))
indices = np.unique(nonzero_indices[0])
for epoch in xrange(self.epochs):
for ind, i in enumerate(xrange(0, n_users, batch_size)):
ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
#print ratings
#pdb.set_trace()
loss = self.AE.ae_batch(ratings)
#loss = self.AE.debug(ratings)
print loss
#pdb.set_trace()
#print("Loss for epoch %d batch %d is %f"%(epoch, ind, loss))
if __name__ == "__main__":
autoencoder = trainAE('./soc-sign-epinions.txt', 100)
autoencoder.train_batch(16)