def train_autoEncoder(trainFile, testFile, recordParserCallback, n_visible, n_hidden, learning_rates, callback, corruption_levels, activation, training_epochs, legend, normalize, generate_attr_vec_callback, total, layer_no, params=None, modulo=1000, useMomentum=False, momentumRate=0.90): if len(corruption_levels) != len(n_hidden): raise Exception("corruption level not provided for each layer...will use default") if len(learning_rates) != len(n_hidden): raise Exception("learning rates not provided for each layer...will use default") legend.append("SAE %d layers" % len(n_hidden)) sample = sparse.csc_matrix(name='s', dtype='float32') label = sparse.csc_matrix(name='l', dtype='float32') x = sparse.csc_matrix(name='x', dtype='float32') y = sparse.csc_matrix(name='y', dtype='float32') corruption_level = T.scalar('corruption') # % of corruption to use learning_rate = T.scalar('lr') # learning rate to use momentum_rate = T.scalar('momentum') # learning rate to use rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) #bhid_value = numpy.zeros(n_hidden, dtype=theano.config.floatX) W = None b = None if params is not None: W = params['W'] if len(params['b']) == len(params['W']): b = params['b'] sda = SdA(numpy_rng=rng, theano_rng=theano_rng, n_ins=n_visible, hidden_layers_sizes=n_hidden, input=x, label=y, activation=activation, W=W, b=b, useMomentum=useMomentum) #activation=activation, b=bhid_value) pretraining_fns = sda.pretraining_functions(sample, label, learning_rate, corruption_level, momentum_rate) #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback) if not params: train(recordParserCallback, generate_attr_vec_callback, pretraining_fns, learning_rates, corruption_levels, momentumRate, training_epochs, batch_size=1, modulo=modulo) # plot without before feature learning #plot_transformed_vectors(testMatrix.toarray(), testDataLabel, title="before feature learning") print "about to test..." test(testFile, recordParserCallback, generate_attr_vec_callback, sda) #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback) #errorVector = sda.get_reconstruction_errors(sampleMatrix.tocsc()) #testMatrix = generate_feature(testData, total, generate_attr_vec_callback) #errorVectorTest = sda.get_reconstruction_errors(testMatrix.tocsc()) ''' def find_avg_error(errorMatrix): error = errorMatrix sqrdErrorMatrix = numpy.dot(error, numpy.transpose(error)) return numpy.diag(sqrdErrorMatrix) print "error train: " + str(math.sqrt(sum(find_avg_error(errorVector)))) print "error test: " + str(math.sqrt(sum(find_avg_error(errorVectorTest)))) ''' # look at individual errors: callback(sda, errorVectors, labels, legend)
def train_autoEncoder(trainData, testData, trainDataLabel, testDataLabel, n_visible, n_hidden, learning_rates, callback, corruption_levels, activation, training_epochs, legend, normalize, generate_attr_vec_callback, total, layer_no, params=None): if len(corruption_levels) != len(n_hidden): raise Exception("corruption level not provided for each layer...will use default") if len(learning_rates) != len(n_hidden): raise Exception("learning rates not provided for each layer...will use default") legend.append("SAE %d layers" % len(n_hidden)) sample = sparse.csc_matrix(name='s', dtype='float32') label = sparse.csc_matrix(name='l', dtype='float32') x = sparse.csc_matrix(name='x', dtype='float32') y = sparse.csc_matrix(name='y', dtype='float32') corruption_level = T.scalar('corruption') # % of corruption to use learning_rate = T.scalar('lr') # learning rate to use rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) #bhid_value = numpy.zeros(n_hidden, dtype=theano.config.floatX) W = None b = None if params is not None: W = params['W'] if len(params['b']) == len(params['W']): b = params['b'] sda = SdA(numpy_rng=rng, theano_rng=theano_rng, n_ins=n_visible, hidden_layers_sizes=n_hidden, input=x, label=y, activation=activation, W=W, b=b) #activation=activation, b=bhid_value) '''cost, updates = sda.get_cost_updates(corruption_level=corruption_level, learning_rate=learning_rate) # this is the function that theano will call to optimize the cost # function. train_da = theano.function([sample, label], cost, updates=updates, givens={x: sample, y: label}) ''' pretraining_fns = sda.pretraining_functions(sample, label, learning_rate, corruption_level) if normalize: def normalize_data(data): # normalize data inputArray = numpy.array(data) minValue = inputArray.flatten().min() maxValue = inputArray.flatten().max() inputArray = (inputArray - float(minValue))/(float(maxValue - minValue)) return inputArray trainData = normalize_data(trainData) testData = normalize_data(testData) #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback) for idx, fn in enumerate(pretraining_fns): print "training layer #%s" % str(idx) for i in range(0, training_epochs): for sample in trainData: sampleMatrix = generate_feature([sample], total, generate_attr_vec_callback) error = fn(sampleMatrix, sampleMatrix, corruption_levels[idx], learning_rates[idx]) print "error train cost: " + str(error) sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback) errorVector = sda.get_reconstruction_errors(sampleMatrix.tocsc()) testMatrix = generate_feature(testData, total, generate_attr_vec_callback) errorVectorTest = sda.get_reconstruction_errors(testMatrix.tocsc()) # plot without before feature learning plot_transformed_vectors(testMatrix.toarray(), testDataLabel, title="before feature learning") def find_avg_error(errorMatrix): error = errorMatrix sqrdErrorMatrix = numpy.dot(error, numpy.transpose(error)) return numpy.diag(sqrdErrorMatrix) print "error train: " + str(math.sqrt(sum(find_avg_error(errorVector)))) print "error test: " + str(math.sqrt(sum(find_avg_error(errorVectorTest)))) # look at individual errors: callback(sda, trainData, trainDataLabel, testData, testDataLabel, generate_attr_vec_callback, legend, total) transformSample = sda.get_hidden_values(sampleMatrix.tocsc()) transformTest = sda.get_hidden_values(testMatrix.tocsc()) return transformSample.eval(), transformTest.eval()
def plot_learning_curve(trainingFile, testFile, recordParserCallback, generate_attr_vec_callback, corruption_levels, learning_rates, momentumRate, training_epochs, n_visible, n_hidden, activation, useMomentum=False, batch_size=1000, modulo=1000): sample = sparse.csc_matrix(name='s', dtype='float32') label = sparse.csc_matrix(name='l', dtype='float32') x = sparse.csc_matrix(name='x', dtype='float32') y = sparse.csc_matrix(name='y', dtype='float32') corruption_level = T.scalar('corruption') # % of corruption to use learning_rate = T.scalar('lr') # learning rate to use momentum_rate = T.scalar('momentum') # learning rate to use rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) #bhid_value = numpy.zeros(n_hidden, dtype=theano.config.floatX) W = None b = None if params is not None: W = params['W'] if len(params['b']) == len(params['W']): b = params['b'] sda = SdA(numpy_rng=rng, theano_rng=theano_rng, n_ins=n_visible, hidden_layers_sizes=n_hidden, input=x, label=y, activation=activation, W=W, b=b, useMomentum=useMomentum) #activation=activation, b=bhid_value) pretraining_fns = sda.pretraining_functions(sample, label, learning_rate, corruption_level, momentum_rate) pl.clf() def plotCurve(x, y, label): # Compute ROC curve and area the curve pl.plot(x, y, label=label) #pl.plot([0, 1], [0, 1], 'k--') #pl.xlim([0.0, 1.0]) #pl.ylim([0.0, 1.0]) #pl.xlabel('False Positive Rate') #pl.ylabel('True Positive Rate') #pl.title('Receiver operating characteristic') pl.legend(loc="lower right") #pl.show() #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback) x = [] y = [] y_cost = [] for iteration in range(1, 5): cost = train(recordParserCallback, generate_attr_vec_callback, pretraining_fns, learning_rates, corruption_levels, momentumRate, training_epochs, batch_size=iteration*batch_size, modulo=modulo, stopping_fn=stop_after_mini_batch) x.append(iteration) y.append(cost) print "about to test..." testCost = test(testFile, recordParserCallback, generate_attr_vec_callback, sda) y_cost.append(testCost) # plot without before feature learning #plot_transformed_vectors(testMatrix.toarray(), testDataLabel, title="before feature learning") #print "about to test..." #test(testFile, recordParserCallback, generate_attr_vec_callback, sda) plotCurve(x, y, "train") plotCurve(x, y_cost, "test") pl.show()