def grid_search(): """ Do a grid search over lambda for the optimal learning rate. """ lambdas = [1e2, 1e3, 1e4, 1e5] # load data train_labels_og = "./dataset/trainingLabels.txt" train_sentences_og = "./dataset/trainingSentences.txt" train_labels, train_sentences, validation_labels, validation_sentences = sr.shuffle_examples( train_labels_og, train_sentences_og, pct_train=0.5 ) gridfile = open("gridsearch_results.txt", "w") # run over values of lambda for lr in lambdas: print lr weights, scores, epoch, ept_avg = SGD_train( train_labels[:100], train_sentences[:100], 20, validation_labels[:100], validation_sentences[:100], "word", lr, ) print scores gridfile.write(str(scores[-2]) + "\n") gridfile.close()
import SGD_CRF import subroutines as sr import dataproc as dp import numpy as np # first load and shuffle the data train_labels_og = './dataset/trainingLabels.txt' train_sentences_og = './dataset/trainingSentences.txt' train_labels, train_sentences, validation_labels, validation_sentences = sr.shuffle_examples(train_labels_og, train_sentences_og, pct_train=0.5) weights, scores, epoch, ept_avg = SGD_CRF.SGD_train(train_labels, train_sentences, 20, validation_labels, validation_sentences, 'word') np.save('sgd_w_0p5.npy', np.array(weights)) np.save('sgd_s_0p5.npy', np.array(scores)) np.save('sgd_ep_0p5.npy', np.array([epoch,ept_avg]))