def train_softmax(pooled_train_features, train_num_images, train_labels): # train softmax regression softmax_lambda = 1e-4 num_classes = 4 softmax_x = np.transpose(pooled_train_features, (0, 2, 3, 1)) softmax_x = softmax_x.reshape((np.size(pooled_train_features)/train_num_images, train_num_images)) softmax_y = train_labels.reshape(np.size(train_labels)) - 1 init_theta = smrt.initialize_theta(softmax_x.shape[0], num_classes) theta = smrt.train(init_theta, softmax_x, softmax_y, num_classes, softmax_lambda) mat = smrt.vec2mat(theta, num_classes) return mat
def pre_train(train_data, train_labels): global input_size, num_labels, hidden_size_l1, hidden_size_l2, ro, lbda, beta levels_l1=[input_size, hidden_size_l1, input_size] print 'Step 1:', time.time() # stack 1 init_theta = initialization(levels_l1) try: theta = np.genfromtxt("theta_l1.txt") print theta.shape except: theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(train_data, train_data, levels_l1, lbda, ro, beta), approx_grad=False, maxiter=400) np.savetxt("theta_l1.txt", theta) print "cost:", cost print "info:", info WB_l1 = vec2mat(theta, levels_l1) sio.savemat('W_l1', {'W_l1':WB_l1[0][0]}) print 'Step 2:', time.time() # stack 2 levels_l2 = [hidden_size_l1, hidden_size_l2, hidden_size_l1] train_l1_a2 = feed_forward(WB_l1[0], train_data, hidden_size_l1) init_theta = initialization(levels_l2) try: theta = np.genfromtxt("theta_l2.txt") print theta.shape except: theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(train_l1_a2, train_l1_a2, levels_l2, lbda, ro, beta), approx_grad=False, maxiter=400) np.savetxt("theta_l2.txt", theta) print "cost:", cost print "info:", info WB_l2 = vec2mat(theta, levels_l2) sio.savemat('W_l2', {'W_l2':WB_l2[0][0]}) print 'Step 3:', time.time() try: sr_theta = np.genfromtxt("sr_theta.txt") print sr_theta.shape except: train_l2_a2 = feed_forward(WB_l2[0], train_l1_a2, hidden_size_l2) sr_init_theta = sr_train.initialize_theta(hidden_size_l2, num_labels) sr_theta = sr_train.train(sr_init_theta, train_l2_a2, train_labels, num_labels) np.savetxt("sr_theta.txt", sr_theta) sr_mat = sr_vec2mat(sr_theta, num_labels) return WB_l1, WB_l2, sr_mat
def main(): global input_size, num_labels, hidden_size, ro, lbda, beta levels = [input_size, hidden_size, input_size] mnist_data = loader.load_train_imgs() mnist_labels = loader.load_train_labels() labeled_set = np.where(mnist_labels < num_labels)[0] unlabeled_set = np.where(mnist_labels > num_labels-1)[0] train_num = np.round(len(labeled_set)/2) train_data = mnist_data[:, labeled_set[:train_num]] train_labels = mnist_labels[labeled_set[:train_num]] test_data = mnist_data[:, labeled_set[train_num:]] test_labels = mnist_labels[labeled_set[train_num:]] unlabeled_data = mnist_data[:, unlabeled_set] print "train data:", train_data.shape[1] print "test data:", test_data.shape[1] print "unlabeled data:", unlabeled_data.shape[1] init_theta = initialization(levels) theta, cost, info = fmin_l_bfgs_b(J, init_theta, args=(unlabeled_data, unlabeled_data, levels, lbda, ro, beta), approx_grad=False, maxiter=400) print "cost:", cost print "info:", info WB = vec2mat(theta, levels) sio.savemat('W', {'W': WB[0][0]}) train_a2 = feed_forward(WB[0], train_data, hidden_size) test_a2 = feed_forward(WB[0], test_data, hidden_size) sr_init_theta = sr_train.initialize_theta(hidden_size, num_labels) sr_theta = sr_train.train(sr_init_theta, train_a2, train_labels, num_labels) sr_mat = sr_vec2mat(sr_theta, num_labels) pY = sr_predict(sr_mat, test_a2) print test_labels[:20] print pY[:20] miss = 0. for i, l in enumerate(test_labels): if l != pY[i]: miss += 1. print miss/len(test_labels)