def main(): data = scipy.io.loadmat('spamData.mat') xtrain = preprocess.log_transform(data['Xtrain']) # xtrain = data['Xtrain'] ytrain = data['ytrain'] xtest = preprocess.log_transform(data['Xtest']) ytest = data['ytest'] threshold = 0.1 reg_learn_pairs = [(0.1, 0.0001), (0.001, 0.001), (0.001, 0.0001), (0.0001, 0.001), (0.0001, 0.0001)] for regularization_weight, learning_rate in reg_learn_pairs: print 'Regularization_weight %s learning_rate %s' % (regularization_weight, learning_rate) xplot = [] yplot = [] beta = batch(xtrain, ytrain, threshold, regularization_weight, learning_rate, xplot, yplot) train = test_error(xtrain, ytrain, beta) test = test_error(xtest, ytest, beta) with open('res.txt', 'a') as f: f.write('%s\t%s\t%s\t%s\n' % (regularization_weight, learning_rate, train, test)) f.flush() # plot xplot vs yplot pyplot.plot(xplot, yplot) pyplot.title('Training Loss vs Number of Iterations.\nregularization_weight %s learning_rate %s' % ( regularization_weight, learning_rate)) pyplot.xlabel("Number of Iterations") pyplot.ylabel("Negative Log Likelihood") pyplot.show()
def main(): if len(sys.argv) != 2: print 'Missing args. Usage: python crossval.py [regularization weight]' return the_file, regularization_weight = sys.argv regularization_weight = float(regularization_weight) data = scipy.io.loadmat('spamData.mat') xtrain = preprocess.log_transform(data['Xtrain']) ytrain = data['ytrain'] shuffled_xtrain, shuffled_ytrain = shuffle(xtrain, ytrain) threshold = 0.0001 learning_rate = 0.0001 print 'Regularization_weight %s learning_rate %s' % (regularization_weight, learning_rate) train = 0 test = 0 for i in range(5): xtest, ytest, xtrain, ytrain = partition(shuffled_xtrain, shuffled_ytrain, i) beta, xp, yp = batch(xtrain, ytrain, threshold, regularization_weight, learning_rate) train += test_error(xtrain, ytrain, beta) test += test_error(xtest, ytest, beta) train = train / 5 test = test / 5 print '%s\t%s\t%s\t%s' % (regularization_weight, learning_rate, train, test) with open('res%s.txt' % regularization_weight, 'a') as f: f.write('%s\t%s\t%s\t%s\n' % (regularization_weight, learning_rate, train, test)) f.flush()