def getSentiment(): dataSet = StanfordSentiment() tokens = dataSet.tokens() # Load the word vectors we trained earlier _, wordVectors, _ = load_saved_params() dimVectors = wordVectors.shape[1] # Load the test sentences sentences = dataSet.getTestSentences() print sentences nSentences = len(sentences) sentenceFeatures = np.zeros((nSentences, dimVectors)) sentenceLabels = np.zeros((nSentences, ), dtype=np.int32) for i in xrange(nSentences): words, sentenceLabels[i] = sentences[i] #print sentences[i] #print words #print sentenceLabels[i] sentenceFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) #train weights random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, nSentences) regularization = 0.00001 weights = sgd(lambda weights: softmax_wrapper( sentenceFeatures, sentenceLabels, weights, regularization), weights, 3.0, 10, PRINT_EVERY=10) #pred = np.sum((weights * sentenceFeatures.T).T, axis = 1) #pred = softmax(pred) #testAccuracy = accuracy(sentenceLabels, pred) prob = softmax(sentenceFeatures.dot(weights)) #prob = normalizeRows(prob) _, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights) pred = 1 / (1 + np.exp(-pred)) #for polarity in pred: #print categorify(polarity) #_, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights) for label in pred: print categorify(label) #pred = categorify(pred) print prob print pred
for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({
print "Training with 2 layer neural net!" dimensions = [dimVectors, 50, 5] weights = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( dimensions[1] + 1) * dimensions[2], ) tr_N = trainLabels.shape[0] tr_labels = np.zeros((tr_N, dimensions[2])) for i in xrange(tr_N): tr_labels[i, trainLabels[i]] = 1 dv_N = devLabels.shape[0] dv_labels = np.zeros((dv_N, dimensions[2])) for i in xrange(dv_N): dv_labels[i, devLabels[i]] = 1 weights = sgd(lambda weights: neural_wrapper(trainFeatures, tr_labels, weights, dimensions), weights, 3.0, ITER, PRINT_EVERY=100) _, _, pred = forward_backward_prop(trainFeatures, tr_labels, weights, dimensions) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy _, _, pred = forward_backward_prop(devFeatures, dv_labels, weights, dimensions) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy ########################################### # Save the results and weights results.append({ "reg" : regularization, "weights" : weights,
devFeatures = np.zeros((nDev, dimVectors)) devLabels = np.zeros((nDev,), dtype=np.int32) for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({ "reg" : regularization, "weights" : weights,
# Context size C = 5 # Reset the random seed to make sure that everyone gets the same results random.seed(31415) np.random.seed(9265) startTime = time.time() wordVectors = np.concatenate( ((np.random.rand(nWords, dimVectors) - 0.5) / dimVectors, np.zeros((nWords, dimVectors))), axis=0) wordVectors = sgd(lambda vec: word2vec_sgd_wrapper( skipgram, tokens, vec, dataset, C, negSamplingCostAndGradient), wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) # Note that normalization is not called here. This is not a bug, # normalizing during training loses the notion of length. print("sanity check: cost at convergence should be around or below 10") print("training took %d seconds" % (time.time() - startTime)) # concatenate the input and output word vectors wordVectors = np.concatenate( (wordVectors[:nWords, :], wordVectors[nWords:, :]), axis=0) # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:] visualizeWords = [
def test_sgd_3(quad): t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None) assert abs(t3) <= 1e-6
def test_sgd_2(quad): t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None) assert abs(t2) <= 1e-6
def test_sgd_1(quad): """ Original normalization test defined in q3_word2vec.py; """ t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None) assert abs(t1) <= 1e-6
def your_sanity_checks(): """ Use this space add any additional sanity checks by running: python q2_neural.py This function will not be called by the autograder, nor will your additional tests be graded. """ print("Running your sanity checks...") ### YOUR CODE HERE PATH = os.getcwd() trainFile = os.path.join(PATH, "optdigits_train.txt") testFile = os.path.join(PATH, "optdigits_test.txt") with open(trainFile, 'r') as train_data: data = [line.split(',') for line in train_data.readlines()] for i in range(len(data)): for j in range(len(data[i])): data[i][j] = int(data[i][j]) trainX = np.asarray(data)[:, :-1] indexY = np.transpose(np.asarray(data)[:, -1:])[0] shp = np.arange(indexY.shape[0]) trainY = np.zeros((trainX.shape[0], 10)) trainY[shp, indexY] = 1 # print(trainY) with open(testFile, 'r') as test_data: data = [line.split(',') for line in test_data.readlines()] for i in range(len(data)): for j in range(len(data[i])): data[i][j] = int(data[i][j]) testX = np.asarray(data)[:, :-1] indexY = np.transpose(np.asarray(data)[:, -1:])[0] shp = np.arange(indexY.shape[0]) testY = np.zeros((testX.shape[0], 10)) testY[shp, indexY] = 1 dimensions = [64, 128, 10] siz = sum((dimensions[i] + 1) * dimensions[i + 1] for i in range(len(dimensions) - 1)) params = np.random.randn(siz) trainChart = [] testChart = [] rate = 0.001 for tenEpoc in range(500): sgd(forward_backward_prop, [trainX, trainY, params, dimensions], rate, 10, PRINT_EVERY=100) trainChart.append(forward_test(trainX, trainY, params, dimensions)) testChart.append(forward_test(testX, testY, params, dimensions)) print("Train iter\t", 10 * tenEpoc, "\t:", trainChart[tenEpoc]) print("Test iter\t", 10 * tenEpoc, "\t:", testChart[tenEpoc]) if testChart[tenEpoc] < 130: rate = 0.0001 fig = plt.figure() plt.xlabel('epoch') plt.ylabel('loss') plt.plot(np.arange(len(trainChart)), trainChart, np.arange(len(testChart)), testChart) plt.legend(['train', 'test'], loc='upper right') plt.show()
for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print("Training for reg=%f" % regularization) # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, args.learning, args.steps, PRINT_EVERY=args.every) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print("Train accuracy (%%): %f" % trainAccuracy) # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print("Dev accuracy (%%): %f" % devAccuracy) # Save the results and weights results.append({
def test_sgd_3(quad): t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None) assert abs(t3) <= 1e-6
def test_sgd_2(quad): t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None) assert abs(t2) <= 1e-6
def test_sgd_1(quad): """ Original normalization test defined in q3_word2vec.py; """ t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None) assert abs(t1) <= 1e-6
devFeatures = np.zeros((nDev, dimVectors)) devLabels = np.zeros((nDev,), dtype=np.int32) for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmaxRegression(trainFeatures, trainLabels, weights, regularization, nopredictions=True), weights, 0.3, 200000, PRINT_EVERY=20000) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy print "Prediction VS ||Weights||^2: %f - %f" % \ (predictionLoss(trainFeatures, trainLabels, weights), regularizationLoss(weights, 1.0)) # Save the results and weights