# Network Architecture patch_size = (28,28) visible_size = patch_size[0] * patch_size[1] hidden_size = 196 #hidden_size = 10 # Training params weight_decay, sparsity_param, beta = 3e-3, 0.1, 3 max_iter = 400 # Maximum number of iterations of L-BFGS to run # Get the data num_samples = 100 num_samples = 100000 patches,_ = sample_images.get_mnist_data('../data/mnist.pkl.gz', lambda l: l >= 5, train=True, num_samples=num_samples) # set up L-BFGS args theta = sparse_autoencoder.initialize_params(hidden_size, visible_size) sae_cost = partial(sparse_autoencoder.cost, visible_size=visible_size, hidden_size=hidden_size, weight_decay=weight_decay, beta=beta, sparsity_param=sparsity_param, data=patches) # Train! trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost, theta, maxfun=max_iter,
def softmax_predict(softmax_model, data): """Accepts model and data. Model shape is (num_labels, input_size), data shape is (input_size, num_examples). Returns array of predicted labels from 0..num_labels . """ # note: do not need to use exponential or sigmoid since #it's monotonic return np.argmax(np.dot(softmax_model, data), axis=0) if __name__=='__main__': num_examples = 60000 data, labels = sample_images.get_mnist_data('../data/mnist.pkl.gz', train=True, num_examples=num_examples) print len(labels) input_size = 28 * 28 # Size of input vector (MNIST images are 28x28) num_classes = 10 # Number of classes (MNIST images fall into 10 classes) weight_decay = 1e-4 # Weight decay parameter max_iter = 400 theta = 0.005 * np.random.randn(num_classes * input_size) trained = softmax_train(input_size, num_classes, weight_decay, data,