def Run_Q3_3_2_sp(): print '*********************************************************************************' print "//Question-3.3.2 (Simple Perceptron-single pass) Report on Adult Data -----------------------------" print '*********************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle) maxVecLen = max(FSize) margin = 0 for i in range(1, 2): for j in LRateList: [Bias, Wvec, LearningMistake] = func.Perceptron(j, 1, XData, YData, maxVecLen, i, margin, 0, SeenWt) mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias, maxVecLen, margin) TrainAccuracy = ( (float(len(XData) - mistakeCount)) / len(XData)) * 100 print 'Using Learning Rate = ', j, ' number of epochs = Sinlge Pass, Initialization = random with mean 0 and sd 0.01' # print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFilehandle, maxVecLen) testMistake = func.TestPerceptron(testXData, testYData, Wvec, Bias, maxVecLen, margin) TestAccuracy = ((float(len(testXData) - testMistake)) / len(testXData)) * 100 print '\t Test Accuracy = ', TestAccuracy, '%\n' print "//End of Question-3.3.2(simple Perceptron-single pass) Report ------------------------------------\n\n\n"
def Run_Q3_grad_with_Shuffle(): print '*********************************************************************************' print "//Question-3: For Grads (Aggressive Perceptron-batch 3,4,5) With Shuffle Report on Adult Data Set -------------" print '*********************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle) maxVecLen = max(FSize) for i in range(3, 6): for k in MarginList: [Bias, Wvec, LearningMistake ] = func.AggressivePerceptron(1, XData, YData, maxVecLen, i, k, 1, SeenWt) mistakeCount = func.TestMarginPerceptron(XData, YData, Wvec, Bias, maxVecLen, k) TrainAccuracy = ( (float(len(XData) - mistakeCount)) / len(XData)) * 100 print 'Using Dynamic Learning rate, number of epochs = ', i, ', Margin = ', k, ', Initialization = random with mean 0 and sd 0.01, with Shuffle' #print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFilehandle, maxVecLen) testMistake = func.TestMarginPerceptron( testXData, testYData, Wvec, Bias, maxVecLen, k) TestAccuracy = ((float(len(testXData) - testMistake)) / len(testXData)) * 100 print '\t Test Accuracy = ', TestAccuracy, '%\n' print "//End of Question-3: For Grads(Aggressive Perceptron-Batch Mode-Shuffle Report -----------------------------\n\n\n"
def Run_Q3_3_3_mp_batch_noShuffle(): print '***************************************************************************************************' print "//Question-3.3.2 (Margin Perceptron-batch 3,4,5) No Shuffle Report on Adult Data -----------------------------" print '***************************************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle) maxVecLen = max(FSize) for i in range(3, 6): for j in LRateList: for k in MarginList: [Bias, Wvec, LearningMistake] = func.Perceptron(j, 1, XData, YData, maxVecLen, i, k, 0, SeenWt) mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias, maxVecLen, k) TrainAccuracy = ( (float(len(XData) - mistakeCount)) / len(XData)) * 100 print 'Using Learning Rate = ', j, ', number of epochs = ', i, ' Margin = ', k, ', Initialization = random with mean 0 and sd 0.01' # print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFilehandle, maxVecLen) testMistake = func.TestPerceptron(testXData, testYData, Wvec, Bias, maxVecLen, k) TestAccuracy = ((float(len(testXData) - testMistake)) / len(testXData)) * 100 print '\t Test Accuracy = ', TestAccuracy, '%\n' print "//End of Question-3.3.2(Margin Perceptron-Batch Mode-NoShuffle) Report ------------------------------------\n\n\n"
def Run_Q1(): [XData, YData, FSize] = func.parseInfo(trainFileHandle) print "YData: ", len(YData), len(XData), FSize sigmaSq = 1000 lr = 0.1 [WVec, LearningMistake, lr, neglogdata] = func.LogReg(XData, YData, FSize, sigmaSq, lr, 100, 1) #[WVec, LearningMistake,lr] = func.LogReg(XData, YData, FSize, sigmaSq, lr, 20,0) FinalTrainAcc = 100 * float(len(XData) - LearningMistake) / len(XData) print "Final Training Accuracy = ", FinalTrainAcc, " %" if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFileHandle, FSize) print "From Test:", len(testXData), len(testYData) TestMistakes = func.LogRegTest(WVec, testXData, testYData) FinalTestAcc = 100 * float(len(testXData) - TestMistakes) / len(testXData) print "Final Test Accuracy = ", FinalTestAcc, " %" #print WVec ##------- Negative log-likelihood Plot --------------- xplot = [neglogdata[i][0] for i in range(0, len(neglogdata))] yplot = [neglogdata[i][1] for i in range(0, len(neglogdata))] plt.plot(xplot, yplot, 'bs') plt.ylabel('Negative Log Likelihood') plt.xlabel('Epochs') plt.show()
def Run_kvalidate(): [XData, YData, FSize] = func.parseInfo(trainFileHandle) bestSigmaSq = 0 bestlr = 0 MaxAccuracy = 0 epochs = 20 #lr = 0.00001 kfoldData = [] print "\n################################################################" print " Starting 5 Fold Cross Validation " print "################################################################\n" for lr0 in lrlist: for sigmaSq in SigmaSqList: blockSize = len(XData) / foldValue testAcc = 0 trainAcc = 0 for k in range(0, foldValue): #print "SigmaSq = ",sigmaSq, ", LR0 = ",lr0 KXTest = XData[k * blockSize:(k + 1) * blockSize] KYTest = YData[k * blockSize:(k + 1) * blockSize] KXData = [ XData[i] for i in range(0, len(XData)) if (i < k * blockSize or i >= (k + 1) * blockSize) ] KYData = [ YData[i] for i in range(0, len(XData)) if (i < k * blockSize or i >= (k + 1) * blockSize) ] [Wvec, trainMist, lr] = func.LogReg(KXData, KYData, FSize, sigmaSq, lr0, epochs, 0) testMist = func.LogRegTest(Wvec, KXTest, KYTest) trainAcc = trainAcc + 100 * float( (len(KXData) - trainMist)) / len(KXData) testAcc = testAcc + 100 * float( (len(KXTest) - testMist)) / len(KXTest) avgtrainacc = trainAcc / foldValue avgtestacc = testAcc / foldValue print "HyperParameter INFO:: %15s" % "SigmaSquare", " = %6s" % sigmaSq, ": %20s" % " LearningRate", " = %6s" % lr0, ": %10s" % " TrainAcc", " = %8s" % avgtrainacc, ": %10s" % " TestAcc", " = %8s " % avgtestacc kfoldData.append([sigmaSq, lr0, avgtrainacc, avgtestacc]) if (MaxAccuracy < avgtestacc): MaxAccuracy = avgtestacc bestSigmaSq = sigmaSq bestlr = lr0 print "HyperParameter INFO:: %15s" % "Best SigmaSq", " = %6s" % bestSigmaSq, ": %20s" % "BestLearningRate", " = %6s" % bestlr, "\n\n" print "\n\n******************************************************" print "HyperParameter Info:: Best Found SigmaSquare = %8s" % bestSigmaSq, ": Best Initial Learning Rate = %8s" % bestlr print "******************************************************\n\n" ##----------- Cross Validation Report ------------------------ # for i in kfoldData: # print "SigmaSquare = ", i[0], "LearningRate = ", i[1], ", TrainAcc = ",i[2], ", TestAcc = ",i[3] ##----------- Now learn on the entire data set --------------------## print "Starting Final Training (On 80 Epochs)..................." [Wvec, trainMist, lr, neglogdata] = func.LogReg(XData, YData, FSize, bestSigmaSq, bestlr, 80, 1) print "Training Completed ......................." FinalTrainAcc = 100 * float(len(XData) - trainMist) / len(XData) print "Final Training Accuracy = %8s" % FinalTrainAcc, "%\n\n" #print "Train Data size = ", len(XData) if (test_index != -1): print "Initiating Final Testing ................." [testXData, testYData] = func.parseInfoTest(testFileHandle, FSize) TestMistakes = func.LogRegTest(Wvec, testXData, testYData) #print "Test Data size = ", len(testXData) print "Final Testing Completed................." FinalTestAcc = 100 * float(len(testXData) - TestMistakes) / len(testXData) print "Final Test Accuracy = %8s" % FinalTestAcc, "%\n\n" ##------- Negative log-likelihood Plot --------------- print "\n\n###########################################" print "Plotting the Negative Log Likelihood" print "###########################################" xplot = [neglogdata[i][0] for i in range(0, len(neglogdata))] yplot = [neglogdata[i][1] for i in range(0, len(neglogdata))] plt.plot(xplot, yplot) plt.ylabel('Negative Log Likelihood') plt.xlabel('Epochs') plt.show()