def finetune(self, train_set, test_set, n_epochs, learning_rate, batch_size): train_set_x, train_set_y, train_set_c = train_set test_set_x, test_set_y, test_set_c = test_set train_set_z = np.zeros(train_set_c.shape) - 1 for i in xrange(train_set_z.shape[0]): train_set_z[i][train_set_y[i]] = 1 train_set_x = make_shared_data(train_set_x) train_set_c = make_shared_data(train_set_c) train_set_z = make_shared_data(train_set_z) train_set_y = T.cast(make_shared_data(train_set_y), 'int32') test_set_x = make_shared_data(test_set_x) test_set_c = make_shared_data(test_set_c) test_set_y = T.cast(make_shared_data(test_set_y), 'int32') index = T.lscalar() # symbolic variable for index to a mini-batch cost = self.logLayer.one_sided_regression_loss gparams = T.grad(cost, self.params) train_model = theano.function( inputs=[index], outputs=cost, updates=[(param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams)], givens={ self.input: train_set_x[index * batch_size:(index + 1) * batch_size], self.logLayer.cost_vector: train_set_c[index * batch_size:(index + 1) * batch_size], self.logLayer.Z_nk: train_set_z[index * batch_size:(index + 1) * batch_size] }, name='train_model') in_sample_result = theano.function( inputs=[], outputs=[self.logLayer.error, self.logLayer.future_cost], givens={ self.input: train_set_x, self.logLayer.y: train_set_y, self.logLayer.cost_vector: train_set_c }, name='in_sample_result') out_sample_result = theano.function( inputs=[], outputs=[self.logLayer.error, self.logLayer.future_cost], givens={ self.input: test_set_x, self.logLayer.y: test_set_y, self.logLayer.cost_vector: test_set_c }, name='out_sample_result') n_train_batches = train_set_x.get_value( borrow=True).shape[0] / batch_size best_Cout = np.inf corresponding_epoch = None corresponding_Eout = None for epoch in xrange(n_epochs): current_batch_cost = 0. for batch_index in xrange(n_train_batches): current_batch_cost += train_model(batch_index) print ' epoch #%d, loss = %f' % (epoch + 1, current_batch_cost / n_train_batches) # TODO: for acceleration Ein, Cin = in_sample_result() Eout, Cout = out_sample_result() if Cout < best_Cout: best_Cout = Cout corresponding_Eout = Eout corresponding_epoch = epoch + 1 print ' better performance achieved ... best_Cout = %f' % best_Cout print 'after training %d epochs, best_Cout = %f, occured in epoch #%d, and corresponding_Eout = %f' \ % (n_epochs, best_Cout, corresponding_epoch, corresponding_Eout)
def learning_feature( self, train_set, n_epochs, learning_rate, batch_size, corruption_level, balance_coef ): # perform `denoising` tilde_x = self.get_corrupted_input(self.x, corruption_level) # map the corrupted input to hidden layer y = T.nnet.sigmoid(T.dot(tilde_x, self.W) + self.b) # maps back hidden representation to unsupervised reconstruction z1 = T.nnet.sigmoid(T.dot(y, self.Wu) + self.bu) L1 = T.mean(-T.sum(self.x * T.log(z1) + (1 - self.x) * T.log(1 - z1), axis=1)) # perform one-sided regression to fit the cost ! z2 = T.dot(y, self.Ws) + self.bs # cost_vector = T.matrix('cost_vector') # Z_nk = T.matrix('Z_nk') # xi = T.maximum((Z_nk * (z2 - cost_vector)), 0.) # xi is a matrix # L2 = T.sum(xi) # TODO: smooth logistic loss function (upper bound) delta = T.log(1 + T.exp(Z_nk * (z2 - cost_vector))) L2 = T.sum(delta) # symbolic variable for balance_coef bc = T.scalar('bc') cost = L1 + bc * L2 gparams = T.grad(cost, self.params) updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams) ] batch_index = T.lscalar('batch_index') train_set_x, train_set_y, train_set_c = train_set train_set_z = np.zeros(train_set_c.shape) - 1 for i in xrange(train_set_z.shape[0]): train_set_z[i][train_set_y[i]] = 1 train_set_x = make_shared_data(train_set_x) train_set_c = make_shared_data(train_set_c) train_set_z = make_shared_data(train_set_z) pretrain_model = theano.function( inputs=[batch_index, bc], outputs=[cost, L1, L2], updates=updates, givens={ self.x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size], cost_vector: train_set_c[batch_index * batch_size: (batch_index + 1) * batch_size], Z_nk: train_set_z[batch_index * batch_size: (batch_index + 1) * batch_size] }, name='pretrain_model' ) n_batches = train_set_x.get_value().shape[0] / batch_size for epoch in xrange(n_epochs): epoch_cost = 0. L1_cost = 0. L2_cost = 0. for batch in xrange(n_batches): batch_cost = pretrain_model(batch, balance_coef) epoch_cost += batch_cost[0] L1_cost += batch_cost[1] L2_cost += batch_cost[2] epoch_cost /= n_batches L1_cost /= n_batches L2_cost /= n_batches print ' epoch #%d, loss = (%f, %f, %f)' % (epoch + 1, epoch_cost, L1_cost, L2_cost) y_new = T.nnet.sigmoid(T.dot(self.x, self.W) + self.b) transform_data = theano.function( inputs=[], outputs=y_new, givens={ self.x: train_set_x }, name='trainform_data' ) return [transform_data(), train_set_y, train_set_c.get_value()]
def sgd_optimize(self, train_set, test_set, n_epochs, learning_rate, batch_size): train_set_x, train_set_y, train_set_c = train_set test_set_x, test_set_y, test_set_c = test_set train_set_z = np.zeros(train_set_c.shape) - 1 for i in xrange(train_set_z.shape[0]): train_set_z[i][train_set_y[i]] = 1 from toolbox import make_shared_data train_set_x = make_shared_data(train_set_x) train_set_c = make_shared_data(train_set_c) train_set_z = make_shared_data(train_set_z) train_set_y = T.cast(make_shared_data(train_set_y), 'int32') test_set_x = make_shared_data(test_set_x) test_set_c = make_shared_data(test_set_c) test_set_y = T.cast(make_shared_data(test_set_y), 'int32') print '... building the model' index = T.lscalar() cost = self.logRegressionLayer.one_sided_regression_loss gparams = [T.grad(cost, param) for param in self.params] train_model = theano.function( inputs=[index], outputs=cost, updates=[ (param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams) ], givens={ self.input: train_set_x[index * batch_size: (index + 1) * batch_size], self.logRegressionLayer.cost_vector: train_set_c[index * batch_size: (index + 1) * batch_size], self.logRegressionLayer.Z_nk: train_set_z[index * batch_size: (index + 1) * batch_size] }, name='train_model' ) in_sample_result = theano.function( inputs=[], outputs=[ self.logRegressionLayer.error, self.logRegressionLayer.future_cost ], givens={ self.input: train_set_x, self.logRegressionLayer.y: train_set_y, self.logRegressionLayer.cost_vector: train_set_c }, name='in_sample_result' ) out_sample_result = theano.function( inputs=[], outputs=[ self.logRegressionLayer.error, self.logRegressionLayer.future_cost ], givens={ self.input: test_set_x, self.logRegressionLayer.y: test_set_y, self.logRegressionLayer.cost_vector: test_set_c }, name='out_sample_result' ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print '... training the model' best_Cout = np.inf corresponding_Eout = np.inf for epoch in xrange(n_epochs): print 'epoch #%d' % (epoch + 1) for batch_index in xrange(n_train_batches): batch_cost = train_model(batch_index) Ein, Cin = in_sample_result() Eout, Cout = out_sample_result() if Cout < best_Cout: best_Cout = Cout corresponding_Eout = Eout print ' better performance achieved ... best_Cout = %f' % best_Cout print 'after training %d epochs, best_Cout = %f, and corresponding_Eout = %f' \ % (n_epochs, best_Cout, corresponding_Eout)
def sgd_optimize(self, train_set, test_set, n_epochs, learning_rate, batch_size): """ Optimizing model parameters by stochastic gradient descent """ train_set_x, train_set_y, train_set_c = train_set assert train_set_x.shape == (60000, 784) assert train_set_y.shape == (60000,) assert train_set_c.shape == (60000, 10) test_set_x, test_set_y, test_set_c = test_set assert test_set_x.shape == (10000, 784) assert test_set_y.shape == (10000,) assert test_set_c.shape == (10000, 10) from toolbox import make_shared_data train_set_x = make_shared_data(train_set_x) train_set_c = make_shared_data(train_set_c) train_set_y = T.cast(make_shared_data(train_set_y), 'int32') test_set_x = make_shared_data(test_set_x) test_set_c = make_shared_data(test_set_c) test_set_y = T.cast(make_shared_data(test_set_y), 'int32') print '... building the model' index = T.lscalar() cost = self.MSE gparams = [T.grad(cost, param) for param in self.params] train_model = theano.function( inputs=[index], outputs=cost, updates=[ (param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams) ], givens={ self.input: train_set_x[index * batch_size: (index + 1) * batch_size], self.cost_vector: train_set_c[index * batch_size: (index + 1) * batch_size] }, name='train_model' ) in_sample_result = theano.function( inputs=[], outputs=[self.error, self.future_cost], givens={ self.input: train_set_x, self.y: train_set_y, self.cost_vector: train_set_c }, name='in_sample_result' ) out_sample_result = theano.function( inputs=[], outputs=[self.error, self.future_cost], givens={ self.input: test_set_x, self.y: test_set_y, self.cost_vector: test_set_c }, name='out_sample_result' ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size print '... training the model' best_Cout = np.inf corresponding_Eout = np.inf for epoch in xrange(n_epochs): print 'epoch #%d' % (epoch + 1) for batch_index in xrange(n_train_batches): batch_cost = train_model(batch_index) Ein, Cin = in_sample_result() Eout, Cout = out_sample_result() if Cout < best_Cout: best_Cout = Cout corresponding_Eout = Eout print ' better performance achieved ... best_Cout = %f' % best_Cout print 'after training %d epochs, best_Cout = %f, and corresponding_Eout = %f' \ % (n_epochs, best_Cout, corresponding_Eout)