def main_train(): trX, teX, trY, teY = mnist(onehot=True) X = T.fmatrix() Y = T.fmatrix() w_h = init_weights((784, 625)) w_h2 = init_weights((625, 625)) w_o = init_weights((625, 10)) params = [w_h, w_h2, w_o] noise_h, noise_h2, noise_py_x = model(X, params, 0.2, 0.5) h, h2, py_x = model(X, params, 0., 0.) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) updates = RMSprop(cost, params, lr=0.001) train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) for i in range(100): for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): cost = train(trX[start:end], trY[start:end]) print np.mean(np.argmax(teY, axis=1) == predict(teX)) if i % 10 == 0: name = 'media/model/modnet-{0}.model'.format(str(i)) save_model(name, params) name = 'media/model/modnet-final.model' save_model(name, params)
def main_train(): trX, teX, trY, teY = mnist(onehot=True) X = T.fmatrix() Y = T.fmatrix() w_h = init_weights((784, 625)) w_o = init_weights((625, 10)) params = [w_h, w_o] py_x = model(X, params) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) updates = sgd(cost, params) train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) for i in range(100): for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): cost = train(trX[start:end], trY[start:end]) print np.mean(np.argmax(teY, axis=1) == predict(teX)) if i % 10 == 0: name = 'media/model/net-{0}.model'.format(str(i)) save_model(name, params) name = 'media/model/net-final.model' save_model(name, params)
def run_regression(trials, batch_size): train_x, test_x, train_y, test_y = mnist(onehot=True) x_dim = len(train_x[0]) y_dim = len(train_y[0]) weight_vec = theano.shared( np.random.randn(x_dim, y_dim) * INITIAL_WEIGHT_MAX) offset = theano.shared(np.zeros(y_dim)) input_data = T.fmatrix('input_data') label = T.fmatrix('label') softmax_output = T.nnet.softmax(T.dot(input_data, weight_vec) + offset) cost = T.mean(T.nnet.categorical_crossentropy(softmax_output, label)) weight_grad, offset_grad = T.grad(cost=cost, wrt=[weight_vec, offset]) updates = [[weight_vec, weight_vec - weight_grad * LEARNING_RATE], [offset, offset - offset_grad * LEARNING_RATE]] train_f = theano.function(inputs=[input_data, label], outputs=cost, updates=updates, allow_input_downcast=True) predicted_label = T.argmax(softmax_output, axis=1) output_f = theano.function(inputs=[input_data], outputs=predicted_label, allow_input_downcast=True) for i in range(trials): for start, end in zip(range(0, len(train_x), batch_size), range(batch_size, len(train_x), batch_size)): cost = train_f(train_x[start:end], train_y[start:end]) print i, np.mean(np.argmax(test_y, axis=1) == output_f(test_x))
def main_rain(): trX, teX, trY, teY = mnist(onehot=True) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) X = T.ftensor4() Y = T.fmatrix() w = init_weights((32, 1, 3, 3)) w2 = init_weights((64, 32, 3, 3)) w3 = init_weights((128, 64, 3, 3)) w4 = init_weights((128 * 3 * 3, 625)) w_o = init_weights((625, 10)) params = [w, w2, w3, w4, w_o] noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, params, 0.2, 0.5) l1, l2, l3, l4, py_x = model(X, params, 0., 0.) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) updates = RMSprop(cost, params, lr=0.001) train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) for i in range(100): for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): cost = train(trX[start:end], trY[start:end]) print np.mean(np.argmax(teY, axis=1) == predict(teX)) if i % 10 == 0: name = 'media/model/conv-{0}.model'.format(str(i)) save_model(name, params) name = 'media/model/conv-final.model' save_model(name, params)
def main_loop(): trX, teX, trY, teY = mnist(ntrain=ntrain,ntest=2000,onehot=True) print "before", trX[30][0:10] seq = mnist_with_noise([trX,trY],10) print "after", trX[30][0:10] X = T.fmatrix() Y = T.fmatrix() #grads = T.fvector() w_h = [init_weights((784, 625)), init_weights((625, 10))] py_x = model(X, w_h) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) params = w_h updates = sgd(cost, params) grads = T.grad(cost=cost,wrt=params) grad_for_norm = T.grad(cost=cost,wrt=params) train = theano.function(inputs=[X, Y], outputs=[cost,grads[0],grads[1]], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) get_grad = theano.function(inputs=[X,Y],outputs=[cost,grad_for_norm[0],grad_for_norm[1]], allow_input_downcast=True) mb_size = 128 for i in range(2): grad_list = [] for start, end in zip(range(0, len(trX), mb_size), range(mb_size, len(trX), mb_size)): cost,grads[0],grads[1] = train(trX[start:end], trY[start:end]) print np.mean(np.argmax(teY, axis=1) == predict(teX)) noisy_grads = [] normal_grads = [] noisy_cost = [] normal_cost = [] mb_size = 1 n_predicts = 0 for i in seq: cost,grad0,grad1 = get_grad(trX[i:i+1], trY[i:i+1]) norm = np.linalg.norm(grad0) if i < 0.1*ntrain: n_predicts += (np.argmax(trY[i:i+1], axis=1)==predict(trX[i:i+1])) noisy_grads.append(norm) noisy_cost.append(cost) else: normal_grads.append(norm) normal_cost.append(cost) print "noisy grad : mean,var - " ,np.mean(noisy_grads),np.var(noisy_grads) print "normal grad: mean,var - " ,np.mean(normal_grads),np.var(normal_grads) print "noisy cost : mean,var - " ,np.mean(noisy_cost),np.var(noisy_cost) print "normal cost: mean,var - " ,np.mean(normal_cost),np.var(normal_cost) print " noisy predicts out of 5000 -", n_predicts plt.plot(noisy_grads) plt.plot(normal_grads) plt.savefig('grad0.jpeg')
def run_net(trials, batch_size): train_x, test_x, train_y, test_y = mnist(onehot=True) input_dim = len(train_x[0]) output_dim = len(test_y[0]) [w_h1, w_h2, weight_outputs] = build_weights([(input_dim, 625), (625, 625), (625, output_dim)]) X = T.fmatrix() #symbolic variable for weight matrix Y = T.fmatrix() #symbolic variable for output # outputs from the layers with dropout [dropout_h1, dropout_h2, dropout_net_output] = forward_prop([X, w_h1, w_h2, weight_outputs], [0.2, 0.5, 0.5, 0.5]) # outputs from the layers without dropout [h1, h2, net_output] = forward_prop([X, w_h1, w_h2, weight_outputs], [0., 0., 0., 0.]) # actual prediction predicted_label = T.argmax(net_output, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(dropout_net_output, Y)) updates = RMSprop(cost, [w_h1, w_h2, weight_outputs]) net_train = function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) get_net_output = function(inputs=[X], outputs=predicted_label, allow_input_downcast=True) for trial in range(trials): for batch_start in range(0, len(train_x) - batch_size, batch_size): batch_end = batch_start + batch_size net_train(train_x[batch_start:batch_end], train_y[batch_start:batch_end]) print np.mean(np.argmax(test_y, axis=1) == get_net_output(test_x))
def main(self): # Load training and test data training_x, test_x, training_y, test_y = load.mnist(onehot=True) # Symbolic variables x = tensor.fmatrix() y = tensor.fmatrix() # Initialize weights temp_weight = INPUT_SIZE weights = [] for layer in self.layer_sizes: weight = self.init_weights((temp_weight, layer)) weights.append(weight) temp_weight = layer weight = self.init_weights((temp_weight, OUTPUT_SIZE)) weights.append(weight) # Initialize model model_layer_noise = self.model2(x, weights, 0.2, 0.5) model_layer_values = self.model2(x, weights, 0., 0.) y_x = tensor.argmax(model_layer_values[-1], axis=1) # Initialize the update function cost = tensor.mean(tensor.nnet.categorical_crossentropy( model_layer_noise[-1], y)) params = weights updates = self.RMSprop(cost, params) # SGD / RMSprop # Initialize core functionality train = theano.function( inputs=[x, y], outputs=cost, updates=updates, allow_input_downcast=True) self.predict = theano.function( inputs=[x], outputs=y_x, allow_input_downcast=True) # Training on mnist print("\nTRAINING...") for i in range(NUMBER_OF_RUNS): for start, end in zip(range(0, len(training_x), BATCH_SIZE), range(BATCH_SIZE, len(training_x), BATCH_SIZE)): cost = train(training_x[start:end], training_y[start:end]) print("Iteration ", i+1, "/", NUMBER_OF_RUNS, "(", numpy.mean(numpy.argmax(test_y, axis=1) == self.predict(test_x))*100, ")") # Testing print("\nTESTING ON: MNIST TRAINING SET...") print(numpy.mean(numpy.argmax(training_y, axis=1) == self.predict(training_x))*100, "percent correct") print("\nTESTING ON: MNIST TEST SET...") print(numpy.mean(numpy.argmax(test_y, axis=1) == self.predict(test_x))*100, "percent correct")
def simple_nn(): # mnist dataset, training + test trX, teX, trY, teY = mnist(onehot=True) X = T.fmatrix() Y = T.fmatrix() # init the weights w_h = init_weights((784, 625)) w_h2 = init_weights((625, 625)) w_o = init_weights((625, 10)) noise_h, noise_h2, noise_py_x = model( X, w_h, w_h2, w_o, 0.8, 0.5 ) h, h2, py_x = model( X, w_h, w_h2, w_o, 1., 1. ) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) params = [w_h, w_h2, w_o] updates = RMSprop(cost, params, lr=0.001) train = theano.function( inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True ) predict = theano.function( inputs=[X], outputs=y_x, allow_input_downcast=True ) f_out = open('res/res_dropout_nn', 'w') for i in range(100): #you can adjust this if training takes too long # train batches of 128 instances for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): cost = train(trX[start:end], trY[start:end]) print i correct = np.mean(np.argmax(teY, axis=1) == predict(teX)) res = str(i) + " " + str(correct) + "\n" f_out.write( res ) print 'Result dropout:', correct
def question2data(): trX, teX, trY, teY = mnist(ntrain=1000, ntest=250, onehot=False) # keep threes and fours only trX, trY = keep_threes_and_fours(trX, trY) teX, teY = keep_threes_and_fours(teX, teY) # converts 3 to 0, and 4 to 1 (two classes) trY = convert_three_and_four_to_zero_and_one(trY) teY = convert_three_and_four_to_zero_and_one(teY) # FIXME: WE do not one hot; we can use a single output for binary classification trX, trY = permute(trX, trY) teX, teY = permute(teX, teY) return trX, trY, teX, teY
def mnist_example(epochs = 10, verbose = False, save = False): print "Initializing network" mnet = ModernNeuralNetwork([784,625,860,10]) trX, teX, trY, teY = mnist(onehot=True) print "Creating Model" mnet.create_model_functions() print "Training Network" for i in range(epochs): for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): cost = mnet.train(trX[start:end], trY[start:end]) if verbose: print np.mean(np.argmax(teY, axis=1) == mnet.predict(teX)) if save: self.save_weights("MNIST_Weights.save") print("Saved weights to \"MNIST_Weights.save\".")
def initiliaze(self): self.trX, self.teX, self.trY, self.teY = mnist(1000, 500, onehot=True) self.trX = self.trX.reshape(-1, 1, 28, 28) self.teX = self.teX.reshape(-1, 1, 28, 28) self.X = T.ftensor4() self.Y = T.fmatrix() self.w = self.init_weights((32, 1, 3, 3)) self.w2 = self.init_weights((64, 32, 3, 3)) self.w3 = self.init_weights((128, 64, 3, 3)) self.w4 = self.init_weights((128 * 3 * 3, 625)) self.w_o = self.init_weights((625, 10)) self.w=np.array(self.w) print type(self.w)
def export_wrong_samples(input_file, output_folder): trX, vlX, teX, trY, vlY, teY = mnist(onehot=False, ndim=2) with open(input_file, 'r') as f: train_ws, train_c, valid_ws, valid_c, test_ws, test_c = pickle.load(f) trX = np.asarray(trX[train_ws], dtype=np.uint8).reshape((-1, 28, 28)) trY = trY[train_ws] vlX = np.asarray(vlX[valid_ws], dtype=np.uint8).reshape((-1, 28, 28)) vlY = vlY[valid_ws] teX = np.asarray(teX[test_ws], dtype=np.uint8).reshape((-1, 28, 28)) teY = teY[test_ws] export_image_array(trX, os.path.join(output_folder, 'train'), "") export_classification_info(trY, train_c, os.path.join(output_folder, 'train'), "") export_image_array(vlX, os.path.join(output_folder, 'valid'), "") export_classification_info(vlY, valid_c, os.path.join(output_folder, 'valid'), "") export_image_array(teX, os.path.join(output_folder, 'test'), "") export_classification_info(teY, test_c, os.path.join(output_folder, 'test'), "")
def main(): # 数据集,数据格式为4D矩阵(样本数,特征图个数,图像行数,图像列数) trX, teX, trY, teY = mnist(onehot=True) h1, hpiece1, h2, hpiece2 = 625, 5, 625, 5 params = basicUtils.randomSearch(nIter=10) cvErrorList = [] for param, num in zip(params, range(len(params))): lr, C = param print '*' * 40, num, 'parameters', param, '*' * 40 maxout = CMaxoutmlp(28 * 28, h1, hpiece1, h2, hpiece2, 10, lr, C, 0.2, 0.5) cvError = maxout.cv(trX, trY) cvErrorList.append(copy(cvError)) optIndex = np.argmin(cvErrorList, axis=0) lr, C = params[optIndex] print 'retraining', params[optIndex] maxout = CMaxoutmlp(28 * 28, h1, hpiece1, h2, hpiece2, 10, lr, C, 0.2, 0.5) maxout.trainmaxout(trX, teX, trY, teY)
def main(): f = open('output.txt', 'w') train = np.genfromtxt('data/train_data.txt') test = np.genfromtxt('data/test_data.txt') val = np.genfromtxt('data/val_data.txt') # Parse data for separating training labels and dataset n_feat = train[0].size train_data = train[:, :-1] print len(train_data) #print train_data[0] train_labels = train[:, n_feat - 1] print train_labels[0] test_data = test[:, :-1] test_labels = test[:, n_feat - 1] val_data = val[:, :-1] val_labels = val[:, n_feat - 1] trX, teX, trY, teY = mnist(onehot=False) print trY[0] train_data = trX[:1000] train_labels = trY[:1000] test_labels = teY test_data = teX SlistX = [] SlistY = [] SlistX.append(trX[0]) SlistY.append(trY[0]) # Print training + validation error for the classifier k_neighbors = [1] for k in k_neighbors: preds_train = knn_algorithm(SlistX, SlistY, train_data, k, train_labels) print len(SlistX) preds_val = knn_algorithm(train_data, train_labels, val_data, k) preds_test = knn_algorithm(train_data, train_labels, test_data, k) f.write("%s-neighbors: \n" % k) f.write("Training error: %s \n" % (calc_error(train_labels, preds_train))) f.write("Validation error: %s \n" % (calc_error(val_labels, preds_val))) f.write("Test error: %s \n" % (calc_error(test_labels, preds_test))) f.write("\n")
def run_regression(trials, batch_size): train_x, test_x, train_y, test_y = mnist(onehot=True) x_dim = len(train_x[0]) y_dim = len(train_y[0]) weight_vec = theano.shared(np.random.randn(x_dim, y_dim) * INITIAL_WEIGHT_MAX) offset = theano.shared(np.zeros(y_dim)) input_data = T.fmatrix('input_data') label = T.fmatrix('label') softmax_output = T.nnet.softmax(T.dot(input_data, weight_vec) + offset) cost = T.mean(T.nnet.categorical_crossentropy(softmax_output, label)) weight_grad, offset_grad = T.grad(cost=cost, wrt=[weight_vec, offset]) updates = [[weight_vec, weight_vec - weight_grad * LEARNING_RATE], [offset, offset - offset_grad * LEARNING_RATE]] train_f = theano.function(inputs=[input_data, label], outputs=cost, updates=updates, allow_input_downcast=True) predicted_label = T.argmax(softmax_output, axis=1) output_f = theano.function(inputs=[input_data], outputs=predicted_label, allow_input_downcast=True) for i in range(trials): for start, end in zip(range(0, len(train_x), batch_size), range(batch_size, len(train_x), batch_size)): cost = train_f(train_x[start:end], train_y[start:end]) print i, np.mean(np.argmax(test_y, axis=1) == output_f(test_x))
def run_net(num_iters, batch_size): train_input, test_input, train_output, test_output = mnist(onehot=True) # reshape image vectors into 4 tensor format for convolution train_input = train_input.reshape(-1, 1, 28, 28) test_input = test_input.reshape(-1, 1, 28, 28) input_sym = T.ftensor4() output_sym = T.fmatrix() w = init_weights((8, 1, 5, 5)) # 8 filters of size 5 X 5 w2 = init_weights((8, 8, 3, 3)) # 8 filters of size 3 X 3 w3 = init_weights((392, 625)) # fully connected layer w_o = init_weights((625, 10)) # fully connected output layer # accumulator variables for RMS prop acc_w = init_weights((8, 1, 5, 5), initial_max_val=0.0) acc_w2 = init_weights((8, 8, 3, 3), initial_max_val=0.0) acc_w3 = init_weights((392, 625), initial_max_val=0.0) acc_wo = init_weights((625, 10), initial_max_val=0.0) noise_act_1_pooled, noise_act_2_flattened, noise_l4, noise_net_output = \ forward_prop(input_sym, w, w2, w3, w_o, 0.2, 0.5) act_1_pooled, act_2_flattened, l4, net_output = \ forward_prop(input_sym, w, w2, w3, w_o, 0., 0.) prediction = T.argmax(net_output, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_net_output, output_sym)) params = [w, w2, w3, w_o] accs = [acc_w, acc_w2, acc_w3, acc_wo] updates = rms_prop(cost, params, accs, lr=0.001) train = theano.function(inputs=[input_sym, output_sym], outputs=cost, updates=updates, \ allow_input_downcast=True) predict = theano.function(inputs=[input_sym], outputs=prediction, allow_input_downcast=True) for i in range(num_iters): for batch_start in range(0, len(train_input) - batch_size, batch_size): cost = train(train_input[batch_start:batch_start + batch_size], \ train_output[batch_start:batch_start + batch_size]) print np.mean(np.argmax(test_output, axis=1) == predict(test_input))
def read_data_sets(data_dir='/data/datasets/', dtype=dtypes.float32, reshape=True, validation_size=1000): train_images, test_images, train_labels, test_labels = mnist(data_dir) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
def compute_grads_and_weights_mnist(A_indices, segment, L_measurements,ntrain=50000,ntest=10000,mb_size=128,nhidden=625 ): trX, teX, trY, teY = mnist(ntrain=ntrain,ntest=ntest,onehot=True) seq = mnist_with_noise([trX,trY],0) X = T.fmatrix() Y = T.fmatrix() w_h = [init_weights((784, nhidden)), init_weights((nhidden, 10))] py_x = model(X, w_h) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) params = w_h updates = sgd(cost, params) grads = T.grad(cost=cost,wrt=params) grad_for_norm = T.grad(cost=cost,wrt=params) train = theano.function(inputs=[X, Y], outputs=[cost,grads[0],grads[1]], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=[y_x,py_x], allow_input_downcast=True) get_grad = theano.function(inputs=[X,Y],outputs=[cost,grad_for_norm[0],grad_for_norm[1]], allow_input_downcast=True) for i in range(1): for start, end in zip(range(0, len(trX), mb_size), range(mb_size, len(trX), mb_size)): cost,grads0,grads1 = train(trX[start:end], trY[start:end]) y,p_y = predict(teX) print np.mean(np.argmax(teY, axis=1) == y)
if sparsity_ind == False: cost = -T.mean( T.sum(target_y * T.log(pred_y) + (1 - target_y) * T.log(1 - pred_y), axis=1)) else: cost = - T.mean(T.sum(target_y * T.log(pred_y) + (1 - target_y) * T.log(1 - pred_y), axis=1)) \ + penalty*T.shape(h)[1]*(sparsity*T.log(sparsity) + (1-sparsity)*T.log(1-sparsity)) \ - penalty*sparsity*T.sum(T.log(T.mean(h, axis=0)+1e-6)) \ - penalty*(1-sparsity)*T.sum(T.log(1-T.mean(h, axis=0)+1e-6)) return cost trX, teX, trY, teY = mnist() # use all data #trX, trY = trX[:1000], trY[:1000] #teX, teY = teX[:200], teY[:200] x = T.fmatrix('x') d = T.fmatrix('d') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # Initialise weights and bias for auto-encoder # Encoding layers W1 = init_weights(28 * 28, 900) b1 = init_bias(900) W2 = init_weights(900, 625) b2 = init_bias(625)
return np.asarray(X, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.01)) def model(X, w): return T.nnet.softmax(T.dot(X, w)) start_time = time.time() ################################################## data_folder = 'truncated_data' ################### dimension = 331 ################################### ################################################## trX, teX, trY, teY = mnist(onehot=True, datasets_dir=os.getcwd() + '/' + data_folder + '_' + str(dimension) + '/', dimension=dimension) X = T.fmatrix() Y = T.fmatrix() w = init_weights((math.pow(dimension, 2), 2)) py_x = model(X, w) y_pred = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) gradient = T.grad(cost=cost, wrt=w) update = [[w, w - gradient * 0.0001]] train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
import numpy as np from load import mnist # takes care of conversions to make your stuff theano-friendly: float32 or float64 def floatX(X): return np.asarray(X, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.01)) #initialiaze to a some gaussian # our model in matrix format def model(X, w): return T.nnet.softmax(T.dot(X, w)) # training matrices trX, teX, trY, teY = mnist(onehot=True) #one hot encoding! X = T.fmatrix() Y = T.fmatrix() w = init_weights((784, 10)) # probability of the labels, given the input py_x = model(X, w) y_pred = T.argmax(py_x, axis=1) # categorical cross entropy is basically telling us to maximize the value that's true cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) gradient = T.grad(cost=cost, wrt=w) update = [[w, w - gradient * 0.05]]
def initialize_mnist(self): self.trX, self.teX, self.trY, self.teY = mnist(onehot = True) self.trX = self.trX.reshape(-1, 1, 28, 28) self.teX = self.teX.reshape(-1, 1, 28, 28) self.set_weights("mnist")
X_hat = model(X, weights_layer_1, weights_layer_y, bias_layer_1, bias_layer_y) # Loss L = - T.sum(X * T.log(X_hat) + (1 - X) * T.log(1 - X_hat), axis=1) loss = T.mean(L) # Parameter Updating params = [weights_layer_y, bias_layer_y, weights_layer_1, bias_layer_1] updates = sgd(loss, params, lr=learning_rate) # Compiling train = theano.function(inputs=[X], outputs=loss, updates=updates, allow_input_downcast=True, on_unused_input='warn') predict = theano.function(inputs=[X], outputs=X_hat, allow_input_downcast=True) # Load the data trX, teX, trY, teY = mnist(scale_data=False) n_batches_train = trX.shape[0] / batch_size # Training for i in range(epochs): cost_per_batch = np.zeros(n_batches_train) pred_cost_per_batch = np.zeros(n_batches_train) cost = [] for start, end in zip(range(0, n_batches_train * batch_size, batch_size), range(batch_size, n_batches_train * batch_size, batch_size)): cost.append(train(trX[start:end])) print "Epoch number {0}".format(i) print 'Mean Cost per Batch %s' % str(i), np.mean(cost)
lr = lr_0 * np.exp( -n / float(n_epochs_organizing_phase) ) if lr < lr_min: return lr_min else: return lr def init_neighborhood_size(map_shape): m, n = map_shape sigma_0 = np.sqrt(m**2 + n**2) / 2. return sigma_0 def init_timeconstant(n_epochs_organizing_phase, sigma_0): return float(n_epochs_organizing_phase) / np.log(sigma_0) trX, teX, trY, teY = mnist(ntrain=60000, ntest=10000, onehot=False) xmin_val = trX[0].min() xmax_val = trX[0].max() def remove_threes_and_fours(X, Y): """ Y: array-like, shape (n_examples,) """ three_idxs = np.where(Y == 3) four_idxs = np.where(Y == 4) ia = np.indices(Y.shape) remaining_idxs = np.setxor1d(ia, np.concatenate((three_idxs[0], four_idxs[0]))) return X[remaining_idxs], Y[remaining_idxs] if raw_input('remove_classes 3 and 4? (y/n)') == 'y': trX, trY = remove_threes_and_fours(trX, trY) teX, teY = remove_threes_and_fours(teX, teY)
def conv_nn(): # mnist dataset, training + test trX, teX, trY, teY = mnist(onehot=True) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) X = T.ftensor4() Y = T.fmatrix() # init the weights # isotropic filters: (5,5) # anisotropic filters: (6,3) and (3,6) w_1 = init_weights((32, 1, 5, 5)) w_2 = init_weights((64, 32, 5, 5)) w_3 = init_weights((128, 64, 2, 2)) #number of pixel in last conv layer: #for isotropic filter: num_filter * pix_per_filter = 128 * 9 = 1152 #for anisotropic filter: num_filter * pix_per_filter = 128 * 8 = 1024 w_h2 = init_weights((1152, 625 )) w_o = init_weights((625, 10)) noise_out_1, noise_out_2, noise_out_2, noise_h2, noise_py_x = model_conv( X, w_1, w_2, w_3, w_h2, w_o, 0.8, 0.5 ) out_1, out_2, out_3, h2, py_x = model_conv( X, w_1, w_2, w_3, w_h2, w_o, 1., 1. ) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) params = [w_1, w_2, w_3, w_h2, w_o] updates = RMSprop(cost, params, lr=0.001) train = theano.function( inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True ) predict = theano.function( inputs=[X], outputs=y_x, allow_input_downcast=True ) f_out = open('res/res_conv_aniso_nn', 'w') for i in range(100): #you can adjust this if training takes too long for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): cost = train(trX[start:end], trY[start:end]) print i correct = np.mean(np.argmax(teY, axis=1) == predict(teX)) res = str(i) + " " + str(correct) + "\n" f_out.write( res ) print 'Result Conv:', correct fweights = w_1.get_value() # save the filters of the first layer for i in range(fweights.shape[0]): fname = 'res/filters_aniso/filter_' + str(i) np.save(fname, fweights[i,0,:,:])
print("Sending index ", i2, "from server to worker", process) process += 1 for process in range(1, size): comm.isend(-1, dest=process, tag=1) def floatX(X): return np.asarray(X, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.01)) def model(X, w): return T.nnet.softmax(T.dot(X, w)) trX, teX, trY, teY = mnist(onehot=True) X = T.fmatrix() Y = T.fmatrix() w = init_weights((784, 10)) w0 = theano.shared(value=np.zeros((784, 10), dtype=theano.config.floatX)) proc = 1 w_tilde = w.copy() #broadcast w_tilde while proc < size: comm.isend(w_tilde, dest=proc, tag=2) proc += 1 for proc in range(1, size): comm.isend(-1, dest=proc, tag=2)
def main(): trX, teX, trY, teY = mnist(onehot=True) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) dtype0 = 'float32' dtype1 = 'float64' X = T.ftensor4() Y = T.fmatrix() w = init_weights((32, 1, 3, 3), dtype0) w2 = init_weights((64, 32, 3, 3), dtype0) w3 = init_weights((128, 64, 3, 3), dtype0) w4 = init_weights((128 * 3 * 3, 625), dtype0) w_o = init_weights((625, 10), dtype0) trX, trY, X, Y = cast_4(trX, trY, X, Y, dtype0) noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model( X, w, w2, w3, w4, w_o, 0.2, 0.5, dtype0) l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, w_o, 0., 0., dtype0) y_x = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) params = [w, w2, w3, w4, w_o] updates = RMSprop(cost, params, dtype0, lr=0.001) train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) # train_model with mini-batch training for i in range(1): start_time = time.time() for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): print(start, ' ', end) cost = train(trX[start:end], trY[start:end]) print("--- %s seconds ---" % (time.time() - start_time)) pred_teX = predict(teX) print(np.mean(np.argmax(teY, axis=1) == pred_teX)) w = theano.shared(value=np.asarray(w.eval(), dtype=dtype1), name='w', borrow=True) w2 = theano.shared(value=np.asarray(w2.eval(), dtype=dtype1), name='w2', borrow=True) w3 = theano.shared(value=np.asarray(w3.eval(), dtype=dtype1), name='w3', borrow=True) w4 = theano.shared(value=np.asarray(w4.eval(), dtype=dtype1), name='w4', borrow=True) w_o = theano.shared(value=np.asarray(w_o.eval(), dtype=dtype1), name='w_o', borrow=True) trX, trY, X, Y = cast_4(trX, trY, X, Y, dtype1) noise_l1 = T.cast(noise_l1, dtype=dtype1) noise_l2 = T.cast(noise_l2, dtype=dtype1) noise_l3 = T.cast(noise_l3, dtype=dtype1) noise_l4 = T.cast(noise_l4, dtype=dtype1) noise_py_x = T.cast(noise_py_x, dtype=dtype1) l1 = T.cast(l1, dtype=dtype1) l2 = T.cast(l2, dtype=dtype1) l3 = T.cast(l3, dtype=dtype1) l4 = T.cast(l4, dtype=dtype1) for i in range(1): start_time = time.time() for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): print(start, ' ', end) cost = train(trX[start:end], trY[start:end]) print("--- %s seconds ---" % (time.time() - start_time)) pred_teX = predict(teX) print(np.mean(np.argmax(teY, axis=1) == pred_teX)) print("Finished!")
def model(X, w_h, w_o): h = T.nnet.sigmoid(T.dot(X, w_h)) py_x = T.nnet.softmax(T.dot(h, w_o)) return py_x def sgd(cost, params, learning_rate=0.05): grads = T.grad(cost=cost, wrt=params) updates = [] for p, g in zip(params, grads): updates.append([p, p - g * learning_rate]) return updates trainX, testX, trainY, testY = load.mnist(onehot=True) X = T.fmatrix() Y = T.fmatrix() hidden_layer_size = 625 w_h = init_weights(shape=(trainX.shape[1], hidden_layer_size)) w_o = init_weights(shape=(hidden_layer_size, trainY.shape[1])) py_x = model(X, w_h, w_o) y_prediction = T.argmax(py_x, axis=1) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) params = [w_h, w_o] updates = sgd(cost, params)
import numpy as np from sklearn.linear_model import LogisticRegression import load import convnet if __name__ == "__main__": print("\nLoading Data...") load_activations = convnet.ConvolutionalNeuralNetwork().load_data num_chunks = 20 trAs = [load_activations("saved/trA{0:02d}.txt".format(i), (60000 / num_chunks, 625)) for i in range(num_chunks)] trA = np.concatenate(trAs) print("trA.shape: {0}".format(trA.shape)) teA = load_activations("saved/teA.txt", (10000, 625)) print("teA.shape: {0}".format(teA.shape)) trX, teX, trY, teY = load.mnist(onehot=True) trC = np.argmax(trY, axis=1) print("trC.shape: {0}".format(trC.shape)) teC = np.argmax(teY, axis=1) print("teC.shape: {0}".format(teC.shape)) print("Done.") print("\nCreating Regression Model...") lr = LogisticRegression() lr.fit(trA, trC) print("Done.") print("\nAnalyzing Training Data...") predictions = lr.predict(trA) print("predictions.shape: {0}".format(predictions.shape)) accuracy = np.mean(predictions == trC)
w = np.swapaxes(w, 0, 1) w = w.reshape(w.shape[0], 1, dim, dim) print(dim) print(w.shape) Plots.plot_filters(w, 1, idx, "layer" + str(i+1)) return def plotter(samples, predictions, Ws, img_x, idx): plot_all_filters(Ws, idx) shp = (samples.shape[0], 1, img_x, img_x) samples = samples.reshape(shp) predictions = predictions.reshape(shp) Plots.plot_predictions_grid(samples, predictions, i, shp) return trX, trY, teX, teY, channels, img_x = mnist(onehot=True) trX = trX.reshape(trX.shape[0], 784) teX = teX.reshape(teX.shape[0], 784) X = T.fmatrix() layers = [784, 100, 10, 100, 784] Ws = get_params(layers) noise_out = model(X, Ws, 0.2, 0.5) clean_out = model(X, Ws, 0., 0.) noise_L = T.sum((X - noise_out)**2, axis=1) noise_cost = noise_L.mean() clean_L = T.sum((X - clean_out)**2, axis=1) clean_cost = clean_L.mean()
import theano.tensor as T import numpy as np import time from load import mnist def floatX(X): # convert to correct dtype return np.asarray(X, dtype=theano.config.floatX) def init_weights(shape): # initialize model parameters return theano.shared(floatX(np.random.randn(*shape) * 0.001), borrow=True) def model (X, w): # model in matrix format return T.nnet.softmax(T.dot(X, w)) trX, teX, trY, teY = mnist(onehot=True) # loading data matrices X = T.fmatrix() Y = T.fmatrix() w = init_weights((784, 10)) # 784 = 28 * 28 size py_x = model(X, w) y_pred = T.argmax(py_x, axis=1) # probability outputs and maxima predictions cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) # classification metric to optimize gradient = T.grad(cost=cost, wrt=w) updates = [[w, w - 0.05 * gradient]] train = theano.function(inputs=[X, Y], outputs=[cost, y_pred], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
# 1 encoder, decoder and a softmax layer def init_weights(n_visible, n_hidden): initial_W = np.asarray( np.random.uniform( low=-4 * np.sqrt(6. / (n_hidden + n_visible)), high=4 * np.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden)), dtype=theano.config.floatX) return theano.shared(value=initial_W, name='W', borrow=True) def init_bias(n): return theano.shared(value=np.zeros(n,dtype=theano.config.floatX),borrow=True) trX, teX, trY, teY = mnist() trX, trY = trX[:12000], trY[:12000] teX, teY = teX[:2000], teY[:2000] x = T.fmatrix('x') d = T.fmatrix('d') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) corruption_level=0.1 training_epochs = 25 learning_rate = 0.1 batch_size = 128
#Momentum #Decay parameter ?? def sgd_momentum(cost, params, lr=0.1, decay=0.0001, momentum=0.1): grads = T.grad(cost=cost, wrt=params) updates = [] for p, g in zip(params, grads): v = theano.shared(p.get_value()) v_new = momentum * v - (g + decay * p) * lr updates.append([p, p + v_new]) updates.append([v, v_new]) return updates trX, teX, trY, teY = mnist() trX, trY = trX[:12000], trY[:12000] teX, teY = teX[:2000], teY[:2000] x = T.fmatrix('x') d = T.fmatrix('d') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) corruption_level = 0.1 training_epochs = 5 learning_rate = 0.1 batch_size = 128 beta = 0.5
def initialize_mnist(self): self.trX, self.teX, self.trY, self.teY = mnist(onehot=True) self.w_h = self.init_weights((784, 625)) self.w_h2 = self.init_weights((625, 625)) self.w_o = self.init_weights((625, 10))
data_nn = np.loadtxt('res/res_simple_nn') data_dropout = np.loadtxt('res/res_dropout_nn') data_prelu = np.loadtxt('res/res_prelu_nn') data_conv = np.loadtxt('res/res_conv_nn') data_aniso = np.loadtxt('res/res_conv_aniso_nn') # isotropic_filter #filter0 = np.load('res/filter/filter_0.npy') #filter1 = np.load('res/filter/filter_15.npy') #filter2 = np.load('res/filter/filter_30.npy') # anisotropic filter filter0 = np.load('res/filters_aniso/filter_0.npy') filter1 = np.load('res/filters_aniso/filter_15.npy') filter2 = np.load('res/filters_aniso/filter_30.npy') trX, teX, trY, teY = mnist(ntrain = 10, ntest = 10) #plot_trainerror(data_nn,data_dropout,data_prelu,data_conv) #plot_single(data_aniso) plot_greyscale(filter0, 'filter0') im = trX[5].reshape( (28,28) ) conv = convolve(im, filter0) plot_greyscale(conv, 'convolution') plot_greyscale(filter1, 'filter15') conv = convolve(im, filter1) plot_greyscale(conv, 'convolution') plot_greyscale(filter2, 'filter30') conv = convolve(im, filter2) plot_greyscale(conv, 'convolution')
def model(X, w1, s1, theta1, t1, w2, s2, theta2, t2, D, G, p_drop_lista, p_drop_hidden): a1 = L.lista(X, w1, s1, theta1, t1) a1 = L.dropout(a1, p_drop_lista) y = T.dot(a1, G) y = L.dropout(y, p_drop_hidden) a2 = L.lista(y, w2, s2, theta2, t2) a2 = L.dropout(a2, p_drop_lista) x_o = T.dot(a2, D) return a1, y, a2, x_o trX, teX, _, trY, teY, _ = mnist(onehot=False) IM_SIZE = 28 * 28 Y_SIZE = 50 BATCH_SIZE = 256 t1 = 3 t2 = 3 X = T.fmatrix() Y = T.fmatrix() dict_size = IM_SIZE * 4 LOAD_OLD = True if not LOAD_OLD: w1 = L.init_weights([IM_SIZE, dict_size]) s1 = L.init_weights([dict_size, dict_size]) theta1 = L.init_weights([dict_size])
def main_loop(): trX, teX, trY, teY = mnist(ntrain=ntrain,ntest=2000,onehot=True) print "before", trX[30][0:10] seq = mnist_with_noise([trX,trY],10) print "after", trX[30][0:10] X = T.fmatrix() Y = T.fmatrix() #grads = T.fvector() w_h = [init_weights((784, 625)), init_weights((625, 10))] py_x = model(X, w_h) y_x = T.argmax(py_x, axis=1) pre_softmax = get_pre_softmax_func(X, w_h) cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) params = w_h updates = sgd(cost, params) grads = T.grad(cost=cost,wrt=params) grad_for_norm = T.grad(cost=cost,wrt=params) train = theano.function(inputs=[X, Y], outputs=[cost,grads[0],grads[1]], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=[y_x,py_x], allow_input_downcast=True) get_grad = theano.function(inputs=[X,Y],outputs=[cost,grad_for_norm[0],grad_for_norm[1],pre_softmax], allow_input_downcast=True) #get_pre_softmax = theano.function([X],) mb_size = 128 for i in range(1): grad_list = [] for start, end in zip(range(0, len(trX), mb_size), range(mb_size, len(trX), mb_size)): cost,grads0,grads1 = train(trX[start:end], trY[start:end]) y,p_y = predict(teX) print np.mean(np.argmax(teY, axis=1) == y) noisy_grads = [] normal_grads = [] noisy_cost = [] normal_cost = [] mb_size = 1 n_predicts = 0 noisy_pre_softmax_norm = [] normal_pre_softmax_norm = [] for i in seq: cost,grad0,grad1,pre_soft = get_grad(trX[i:i+1], trY[i:i+1]) norm = np.linalg.norm(grad0) y,py = predict(trX[i:i+1]) if i < 0.1*ntrain: n_predicts += (np.argmax(trY[i:i+1], axis=1)==y) noisy_grads.append(norm) noisy_cost.append(cost) noisy_pre_softmax_norm.append(np.linalg.norm(pre_soft)) else: normal_grads.append(norm) normal_cost.append(cost) normal_pre_softmax_norm.append(np.linalg.norm(pre_soft)) print "noisy grad : mean,var - " ,np.mean(noisy_grads),np.var(noisy_grads) print "normal grad: mean,var - " ,np.mean(normal_grads),np.var(normal_grads) print "noisy cost : mean,var - " ,np.mean(noisy_cost),np.var(noisy_cost) print "normal cost: mean,var - " ,np.mean(normal_cost),np.var(normal_cost) print "noisy pre_softmax norm : mean,var - " ,np.mean(noisy_pre_softmax_norm),np.var(noisy_pre_softmax_norm) print "normal pre softmax norm : mean,var - " ,np.mean(normal_pre_softmax_norm),np.var(normal_pre_softmax_norm) print " noisy predicts out of 5000 -", n_predicts plt.plot(noisy_grads) plt.plot(normal_grads) plt.savefig('grad0.jpeg')
def main(self): # Load training and test data training_x, test_x, training_y, test_y = load.mnist(onehot=True) # Symbolic variables x = tensor.fmatrix() y = tensor.fmatrix() # Initialize weights temp_weight = INPUT_SIZE weights = [] for layer in self.layer_sizes: weight = self.init_weights((temp_weight, layer)) weights.append(weight) temp_weight = layer weight = self.init_weights((temp_weight, OUTPUT_SIZE)) weights.append(weight) # Initialize model model_layer_noise = self.model2(x, weights, 0.2, 0.5) model_layer_values = self.model2(x, weights, 0., 0.) y_x = tensor.argmax(model_layer_values[-1], axis=1) # Initialize the update function cost = tensor.mean( tensor.nnet.categorical_crossentropy(model_layer_noise[-1], y)) params = weights updates = self.RMSprop(cost, params) # SGD / RMSprop # Initialize core functionality train = theano.function(inputs=[x, y], outputs=cost, updates=updates, allow_input_downcast=True) self.predict = theano.function(inputs=[x], outputs=y_x, allow_input_downcast=True) # Training on mnist print("\nTRAINING...") for i in range(NUMBER_OF_RUNS): for start, end in zip( range(0, len(training_x), BATCH_SIZE), range(BATCH_SIZE, len(training_x), BATCH_SIZE)): cost = train(training_x[start:end], training_y[start:end]) print( "Iteration ", i + 1, "/", NUMBER_OF_RUNS, "(", numpy.mean( numpy.argmax(test_y, axis=1) == self.predict(test_x)) * 100, ")") # Testing print("\nTESTING ON: MNIST TRAINING SET...") print( numpy.mean( numpy.argmax(training_y, axis=1) == self.predict(training_x)) * 100, "percent correct") print("\nTESTING ON: MNIST TEST SET...") print( numpy.mean(numpy.argmax(test_y, axis=1) == self.predict(test_x)) * 100, "percent correct")
reconstruction_mse = T.dot(reconstruction_dims.T, (X - X_hat)**2)/T.sum(T.neq(reconstruction_dims, 0.)) prediction_mse = T.dot(prediction_dims.T, (X - X_hat)**2)/T.sum(T.neq(prediction_dims, 0.)) # Parameter Updating params = [weights_layer_1, weights_layer_mu, weights_layer_sig, weights_layer_2, weights_layer_y, bias_layer_1, bias_layer_mu, bias_layer_sig, bias_layer_2, bias_layer_y] updates = RMSprop(-L, params, lr=learning_rate) # Compiling train = theano.function(inputs=[X, epsilon], outputs=[L, log_lik, D_KL], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X, epsilon], outputs=[model_input, X_hat, T.mean(reconstruction_mse), T.mean(prediction_mse)], allow_input_downcast=True) crop = theano.function(inputs=[X], outputs=crop_X, allow_input_downcast=True) # Load the data trX, teX, trY, teY = mnist(scale_data=False, add_noise=False, add_pattern=False) n_batches_train = trX.shape[0] / batch_size for i in range(epochs): L_per_batch = np.zeros(n_batches_train) log_lik_per_batch = np.zeros(n_batches_train) D_KL_per_batch = np.zeros(n_batches_train) batch_num = 0 for start, end in zip(range(0, n_batches_train * batch_size, batch_size), range(batch_size, n_batches_train * batch_size, batch_size)): e = np.random.normal(0, 1, (batch_size, n_z)) batch_L, batch_log_lik, batch_D_KL = train(trX[start:end], e) L_per_batch[batch_num] = batch_L
import load import convnet if __name__ == "__main__": print("\nLoading Data...") load_activations = convnet.ConvolutionalNeuralNetwork().load_data num_chunks = 20 trAs = [ load_activations("saved/trA{0:02d}.txt".format(i), (60000 / num_chunks, 625)) for i in range(num_chunks) ] trA = np.concatenate(trAs) print("trA.shape: {0}".format(trA.shape)) teA = load_activations("saved/teA.txt", (10000, 625)) print("teA.shape: {0}".format(teA.shape)) trX, teX, trY, teY = load.mnist(onehot=True) trC = np.argmax(trY, axis=1) print("trC.shape: {0}".format(trC.shape)) teC = np.argmax(teY, axis=1) print("teC.shape: {0}".format(teC.shape)) print("Done.") print("\nCreating Regression Model...") lr = LogisticRegression() lr.fit(trA, trC) print("Done.") print("\nAnalyzing Training Data...") predictions = lr.predict(trA) print("predictions.shape: {0}".format(predictions.shape)) accuracy = np.mean(predictions == trC)
# Compiling train = theano.function(inputs=[X, epsilon], outputs=[L, log_lik, D_KL], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X, epsilon], outputs=[ model_input, X_hat, T.mean(reconstruction_mse), T.mean(prediction_mse) ], allow_input_downcast=True) # Load the data trX, teX, trY, teY = mnist(scale_data=False) n_batches_train = trX.shape[0] / batch_size for i in range(epochs): L_per_batch = np.zeros(n_batches_train) log_lik_per_batch = np.zeros(n_batches_train) D_KL_per_batch = np.zeros(n_batches_train) batch_num = 0 for start, end in zip( range(0, n_batches_train * batch_size, batch_size), range(batch_size, n_batches_train * batch_size, batch_size)): e = np.random.normal(0, 1, (batch_size, n_z)) batch_L, batch_log_lik, batch_D_KL = train(trX[start:end], e) L_per_batch[batch_num] = batch_L log_lik_per_batch[batch_num] = batch_log_lik
from svdResNet import svdResNet from load import mnist import sys def usageAndExit(): print "Useage: python run_mlp.py [Net]\n Net=svdMLP/MLP" sys.exit(0) if len(sys.argv) < 2: usageAndExit() netType = sys.argv[1] trX, teX, trY, teY = mnist(onehot=False) print "Training data: ", trX.shape, trY.shape print "Test data: ", teX.shape, teY.shape n_in = 28 * 28 n_out = 10 n_h = 128 n_r = 16 n_layers = 10 m = 0.1 num_epoch = 1 #50 batchsize = 1000 validation_int = 100 learning_rate = 0.0001
test_accuracy = test_acc / test_batches print(" " + prefix + " loss:\t\t{:.6f}\t{:.6f}".format(average_test_score, class_err)) print(" " + prefix + " accuracy:\t{:.6f} %".format( test_accuracy * 100)) return average_test_score, test_accuracy, wrong_samples, wrong_classification # -----------------------LOAD IMAGES AND LABELS----------------------------# print('Loading data') # Load index of labeled images in train set with open(os.path.join(DATA_PATH, 'labeled_index.pkl'), 'r') as f: labeled_idx = pickle.load(f) # Load image and label of train, validation, test set trX, vlX, teX, trY, vlY, teY = mnist(onehot=True, normalize_axes=None, ndim=2) IM_SIZE = trX.shape[1] # with open('../data/pca_model.pkl','r') as f: # pca = pickle.load(f) # trX=pca.transform(trX) # vlX=pca.transform(vlX) # teX=pca.transform(teX) # -----------------------SET PARAMETERS-------------------------# losses_ratio = run_parameters.losses_ratio supervised_cost_fun = run_parameters.supervised_cost_fun # -----------------------CREATE RUN FUNCTIONS------------------# # Creating the computation graph print('Building computation graph') input_var = T.fmatrix('input_var')
import numpy as np from load import mnist trX, vlX, _, _, _, _ = mnist() trX = np.concatenate((trX, vlX))
l2a = rectify(conv2d(l1, w2)) l2 = max_pool_2d(l2a, (2, 2)) l2 = dropout(l2, p_drop_conv) l3a = rectify(conv2d(l2, w3)) l3b = max_pool_2d(l3a, (2, 2)) l3 = T.flatten(l3b, outdim=2) l3 = dropout(l3, p_drop_conv) l4 = rectify(T.dot(l3, w4)) l4 = dropout(l4, p_drop_hidden) pyx = softmax(T.dot(l4, w_o)) return l1, l2, l3, l4, pyx trX, teX, trY, teY = mnist(onehot=True) trX = trX.reshape(-1, 1, 28, 28) teX = teX.reshape(-1, 1, 28, 28) X = T.ftensor4() Y = T.fmatrix() w = init_weights((32, 1, 3, 3)) w2 = init_weights((64, 32, 3, 3)) w3 = init_weights((128, 64, 3, 3)) w4 = init_weights((128 * 3 * 3, 625)) w_o = init_weights((625, 10)) noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5) l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.)
return lr_min else: return lr def init_neighborhood_size(map_shape): m, n = map_shape sigma_0 = np.sqrt(m**2 + n**2) / 2. return sigma_0 def init_timeconstant(n_epochs_organizing_phase, sigma_0): return float(n_epochs_organizing_phase) / np.log(sigma_0) trX, teX, trY, teY = mnist(ntrain=60000, ntest=10000, onehot=False) xmin_val = trX[0].min() xmax_val = trX[0].max() def remove_threes_and_fours(X, Y): """ Y: array-like, shape (n_examples,) """ three_idxs = np.where(Y == 3) four_idxs = np.where(Y == 4) ia = np.indices(Y.shape) remaining_idxs = np.setxor1d(ia, np.concatenate((three_idxs[0], four_idxs[0]))) return X[remaining_idxs], Y[remaining_idxs] if raw_input('remove_classes 3 and 4? (y/n)') == 'y':