def check_numerical_gradient(): X = loader.load_train_imgs()[:64, :10] Y = loader.load_train_labels()[:10] levels = [64, 20, 20, 10] theta = initialization(levels) print theta.shape lbda = 3e-3 grad = J(theta, X, Y, levels, lbda)[1] numeric_grad = compute_numerical_gradient(J, theta, X, Y, levels, lbda) for i, g in enumerate(grad): print g, numeric_grad[i]
def train(): global input_size, num_labels, hidden_size_l1, hidden_size_l2, lbda levels = [input_size, hidden_size_l1, hidden_size_l2, num_labels] train_data = loader.load_train_imgs() train_labels = loader.load_train_labels() WB_l1, WB_l2, sr_mat = pre_train(train_data, train_labels) init_theta = stack_params((WB_l1[0], WB_l2[0]), sr_mat) theta = fine_tuning(init_theta, train_data, train_labels, levels, lbda) return theta
def test(theta): global input_size, num_labels, hidden_size_l1, hidden_size_l2 levels = [input_size, hidden_size_l1, hidden_size_l2, num_labels] train_data = loader.load_train_imgs(u'../MNISTHelper/t10k-images.idx3-ubyte') train_labels = loader.load_train_labels(u'../MNISTHelper/t10k-labels.idx1-ubyte') pl = predict(train_data, theta, levels) print train_labels[:20] print pl[:20] e = 0. for i in range(len(pl)): if train_labels[i] != pl[i]: e += 1 print 'error rate:', e/len(pl)
def main(): levels = [28*28, 196, 28*28] ro = 0.1 beta = 3. lbda = 3e-3 init_theta = initialization(levels) X = load_train_imgs()[:, :10000] theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(X, X, levels, lbda, ro, beta), approx_grad=False, maxiter=400) print theta print cost print info WB = vec2mat(theta, levels) print 'W shape:', WB[0][0].shape sio.savemat('W', {'W': WB[0][0]})
def main(): global input_size, num_labels, hidden_size, ro, lbda, beta levels = [input_size, hidden_size, input_size] mnist_data = loader.load_train_imgs() mnist_labels = loader.load_train_labels() labeled_set = np.where(mnist_labels < num_labels)[0] unlabeled_set = np.where(mnist_labels > num_labels-1)[0] train_num = np.round(len(labeled_set)/2) train_data = mnist_data[:, labeled_set[:train_num]] train_labels = mnist_labels[labeled_set[:train_num]] test_data = mnist_data[:, labeled_set[train_num:]] test_labels = mnist_labels[labeled_set[train_num:]] unlabeled_data = mnist_data[:, unlabeled_set] print "train data:", train_data.shape[1] print "test data:", test_data.shape[1] print "unlabeled data:", unlabeled_data.shape[1] init_theta = initialization(levels) theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(unlabeled_data, unlabeled_data, levels, lbda, ro, beta), approx_grad=False, maxiter=400) print "cost:", cost print "info:", info WB = vec2mat(theta, levels) sio.savemat('W', {'W': WB[0][0]}) train_a2 = feed_forward(WB[0], train_data, hidden_size) test_a2 = feed_forward(WB[0], test_data, hidden_size) sr_init_theta = sr_train.initialize_theta(hidden_size, num_labels) sr_theta = sr_train.train(sr_init_theta, train_a2, train_labels, num_labels) sr_mat = sr_vec2mat(sr_theta, num_labels) pY = sr_predict(sr_mat, test_a2) print test_labels[:20] print pY[:20] miss = 0. for i, l in enumerate(test_labels): if l != pY[i]: miss += 1. print miss/len(test_labels)
try: theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=((X, Y, lbda, num_classes),), approx_grad=False, maxiter=400) except Exception as e: print e print theta print cost print info return theta if __name__ == '__main__': inputSize = 28*28 numClasses = 10 lbda = 1e-4 init_theta = initialize_theta(inputSize, numClasses) X = load_train_imgs(u'../MNISThelper/train-images.idx3-ubyte') Y = load_train_labels(u'../MNISThelper/train-labels.idx1-ubyte') print X.shape, Y.shape, init_theta.shape # numerical gradient check ''' cX = X[:20, :10] cY = Y[:10] check_numerical_gradient(init_theta[:200], cX, cY, numClasses, lbda=lbda) ''' theta = train(init_theta, X, Y, numClasses, lbda) mat = vec2mat(theta, numClasses) tX = load_train_imgs('../MNISThelper/t10k-images.idx3-ubyte') tY = load_train_labels('../MNISThelper/t10k-labels.idx1-ubyte') pY = predict(mat, tX) print tY[:20] print pY[:20]