def Run_Q3_3_2_sp(): print '*********************************************************************************' print "//Question-3.3.2 (Simple Perceptron-single pass) Report on Adult Data -----------------------------" print '*********************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle) maxVecLen = max(FSize) margin = 0 for i in range(1, 2): for j in LRateList: [Bias, Wvec, LearningMistake] = func.Perceptron(j, 1, XData, YData, maxVecLen, i, margin, 0, SeenWt) mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias, maxVecLen, margin) TrainAccuracy = ( (float(len(XData) - mistakeCount)) / len(XData)) * 100 print 'Using Learning Rate = ', j, ' number of epochs = Sinlge Pass, Initialization = random with mean 0 and sd 0.01' # print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFilehandle, maxVecLen) testMistake = func.TestPerceptron(testXData, testYData, Wvec, Bias, maxVecLen, margin) TestAccuracy = ((float(len(testXData) - testMistake)) / len(testXData)) * 100 print '\t Test Accuracy = ', TestAccuracy, '%\n' print "//End of Question-3.3.2(simple Perceptron-single pass) Report ------------------------------------\n\n\n"
def Run_Q3_grad_with_Shuffle(): print '*********************************************************************************' print "//Question-3: For Grads (Aggressive Perceptron-batch 3,4,5) With Shuffle Report on Adult Data Set -------------" print '*********************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle) maxVecLen = max(FSize) for i in range(3, 6): for k in MarginList: [Bias, Wvec, LearningMistake ] = func.AggressivePerceptron(1, XData, YData, maxVecLen, i, k, 1, SeenWt) mistakeCount = func.TestMarginPerceptron(XData, YData, Wvec, Bias, maxVecLen, k) TrainAccuracy = ( (float(len(XData) - mistakeCount)) / len(XData)) * 100 print 'Using Dynamic Learning rate, number of epochs = ', i, ', Margin = ', k, ', Initialization = random with mean 0 and sd 0.01, with Shuffle' #print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFilehandle, maxVecLen) testMistake = func.TestMarginPerceptron( testXData, testYData, Wvec, Bias, maxVecLen, k) TestAccuracy = ((float(len(testXData) - testMistake)) / len(testXData)) * 100 print '\t Test Accuracy = ', TestAccuracy, '%\n' print "//End of Question-3: For Grads(Aggressive Perceptron-Batch Mode-Shuffle Report -----------------------------\n\n\n"
def Run_Q3_3_3_mp_batch_noShuffle(): print '***************************************************************************************************' print "//Question-3.3.2 (Margin Perceptron-batch 3,4,5) No Shuffle Report on Adult Data -----------------------------" print '***************************************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle) maxVecLen = max(FSize) for i in range(3, 6): for j in LRateList: for k in MarginList: [Bias, Wvec, LearningMistake] = func.Perceptron(j, 1, XData, YData, maxVecLen, i, k, 0, SeenWt) mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias, maxVecLen, k) TrainAccuracy = ( (float(len(XData) - mistakeCount)) / len(XData)) * 100 print 'Using Learning Rate = ', j, ', number of epochs = ', i, ' Margin = ', k, ', Initialization = random with mean 0 and sd 0.01' # print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFilehandle, maxVecLen) testMistake = func.TestPerceptron(testXData, testYData, Wvec, Bias, maxVecLen, k) TestAccuracy = ((float(len(testXData) - testMistake)) / len(testXData)) * 100 print '\t Test Accuracy = ', TestAccuracy, '%\n' print "//End of Question-3.3.2(Margin Perceptron-Batch Mode-NoShuffle) Report ------------------------------------\n\n\n"
def Run_Q1(): [XData, YData, FSize] = func.parseInfo(trainFileHandle) print "YData: ", len(YData), len(XData), FSize sigmaSq = 1000 lr = 0.1 [WVec, LearningMistake, lr, neglogdata] = func.LogReg(XData, YData, FSize, sigmaSq, lr, 100, 1) #[WVec, LearningMistake,lr] = func.LogReg(XData, YData, FSize, sigmaSq, lr, 20,0) FinalTrainAcc = 100 * float(len(XData) - LearningMistake) / len(XData) print "Final Training Accuracy = ", FinalTrainAcc, " %" if (test_index != -1): [testXData, testYData] = func.parseInfoTest(testFileHandle, FSize) print "From Test:", len(testXData), len(testYData) TestMistakes = func.LogRegTest(WVec, testXData, testYData) FinalTestAcc = 100 * float(len(testXData) - TestMistakes) / len(testXData) print "Final Test Accuracy = ", FinalTestAcc, " %" #print WVec ##------- Negative log-likelihood Plot --------------- xplot = [neglogdata[i][0] for i in range(0, len(neglogdata))] yplot = [neglogdata[i][1] for i in range(0, len(neglogdata))] plt.plot(xplot, yplot, 'bs') plt.ylabel('Negative Log Likelihood') plt.xlabel('Epochs') plt.show()
def Run_Q3_3_1(): print '*********************************************************************************' print "//Question-3.3.1 Report (Sanity check using Table2 -----------------------------" print '*********************************************************************************' [XData, YData, FSize, SeenWt] = func.parseInfo(sanityTable) maxVecLen = max(FSize) for i in range(1, 2): for j in LRateList: [Bias, Wvec, LearningMistake] = func.Perceptron(j, 0, XData, YData, maxVecLen, i, 0, 0, SeenWt) mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias, maxVecLen, 0) TrainAccuracy = (float(len(XData) - mistakeCount) / len(XData)) * 100 print 'Using Learning Rate = ', j, ' number of epochs = ', i, 'Initialization = @ default 0 , No-Shuffle' print '\t Learned Weight Vector = ', Wvec print '\t Learned Bias = ', Bias print '\t Mistakes during Learning = ', LearningMistake print '\t Learning Accuracy = ', TrainAccuracy, '%\n' print "//End of Question-3.3.1 Report -------------------------------------------------\n\n"
def Run_kvalidate(): [XData, YData, FSize] = func.parseInfo(trainFileHandle) bestSigmaSq = 0 bestlr = 0 MaxAccuracy = 0 epochs = 20 #lr = 0.00001 kfoldData = [] print "\n################################################################" print " Starting 5 Fold Cross Validation " print "################################################################\n" for lr0 in lrlist: for sigmaSq in SigmaSqList: blockSize = len(XData) / foldValue testAcc = 0 trainAcc = 0 for k in range(0, foldValue): #print "SigmaSq = ",sigmaSq, ", LR0 = ",lr0 KXTest = XData[k * blockSize:(k + 1) * blockSize] KYTest = YData[k * blockSize:(k + 1) * blockSize] KXData = [ XData[i] for i in range(0, len(XData)) if (i < k * blockSize or i >= (k + 1) * blockSize) ] KYData = [ YData[i] for i in range(0, len(XData)) if (i < k * blockSize or i >= (k + 1) * blockSize) ] [Wvec, trainMist, lr] = func.LogReg(KXData, KYData, FSize, sigmaSq, lr0, epochs, 0) testMist = func.LogRegTest(Wvec, KXTest, KYTest) trainAcc = trainAcc + 100 * float( (len(KXData) - trainMist)) / len(KXData) testAcc = testAcc + 100 * float( (len(KXTest) - testMist)) / len(KXTest) avgtrainacc = trainAcc / foldValue avgtestacc = testAcc / foldValue print "HyperParameter INFO:: %15s" % "SigmaSquare", " = %6s" % sigmaSq, ": %20s" % " LearningRate", " = %6s" % lr0, ": %10s" % " TrainAcc", " = %8s" % avgtrainacc, ": %10s" % " TestAcc", " = %8s " % avgtestacc kfoldData.append([sigmaSq, lr0, avgtrainacc, avgtestacc]) if (MaxAccuracy < avgtestacc): MaxAccuracy = avgtestacc bestSigmaSq = sigmaSq bestlr = lr0 print "HyperParameter INFO:: %15s" % "Best SigmaSq", " = %6s" % bestSigmaSq, ": %20s" % "BestLearningRate", " = %6s" % bestlr, "\n\n" print "\n\n******************************************************" print "HyperParameter Info:: Best Found SigmaSquare = %8s" % bestSigmaSq, ": Best Initial Learning Rate = %8s" % bestlr print "******************************************************\n\n" ##----------- Cross Validation Report ------------------------ # for i in kfoldData: # print "SigmaSquare = ", i[0], "LearningRate = ", i[1], ", TrainAcc = ",i[2], ", TestAcc = ",i[3] ##----------- Now learn on the entire data set --------------------## print "Starting Final Training (On 80 Epochs)..................." [Wvec, trainMist, lr, neglogdata] = func.LogReg(XData, YData, FSize, bestSigmaSq, bestlr, 80, 1) print "Training Completed ......................." FinalTrainAcc = 100 * float(len(XData) - trainMist) / len(XData) print "Final Training Accuracy = %8s" % FinalTrainAcc, "%\n\n" #print "Train Data size = ", len(XData) if (test_index != -1): print "Initiating Final Testing ................." [testXData, testYData] = func.parseInfoTest(testFileHandle, FSize) TestMistakes = func.LogRegTest(Wvec, testXData, testYData) #print "Test Data size = ", len(testXData) print "Final Testing Completed................." FinalTestAcc = 100 * float(len(testXData) - TestMistakes) / len(testXData) print "Final Test Accuracy = %8s" % FinalTestAcc, "%\n\n" ##------- Negative log-likelihood Plot --------------- print "\n\n###########################################" print "Plotting the Negative Log Likelihood" print "###########################################" xplot = [neglogdata[i][0] for i in range(0, len(neglogdata))] yplot = [neglogdata[i][1] for i in range(0, len(neglogdata))] plt.plot(xplot, yplot) plt.ylabel('Negative Log Likelihood') plt.xlabel('Epochs') plt.show()
w_temp2 = {} TrainToolsDict_temp = {} accuracy_lst = [] ##--------- Get the training data ---------## dataFileName = Dpath + "/Training/" + tool + ".data" allData = open(dataFileName, "rb").read().splitlines() for t in range(epoch): random.shuffle(allData) trainLength = int(len(allData) * 0.6) testLength = len(allData) - trainLength trainData = [] for i in range(trainLength): trainData.append(random.choice(allData)) [xTrainData, yTrainData] = func.parseInfo(trainData) w_temp2[t] = linearRegression.LinearRegression( xTrainData, yTrainData) ##---- Get the error norm and score on the training data -----## TrainToolsDict_temp[t] = func.getPredictedScoreError( xTrainData, yTrainData, w_temp2[t]) testData = [item for item in allData if item not in trainData] [xTestData, yTestData] = func.parseInfo(testData) accuracy_lst.append(func.accuracy(w_temp2[t], xTestData, yTestData)) #func.reportAccuracy(tool, accuracy_lin[tool]) ## --- Get the mean of acuuracies --- ## accuracy_lin[tool] = sum(accuracy_lst) / len(accuracy_lst) * 100
TrainToolsDict = {} for tool in trainTools: ##--------- Get the training data ---------## trainFileName = Dpath+"/Training/"+tool+".data" #trainData = csv.reader(open(trainFileName, "rb"), delimiter=" ") trainData = open(trainFileName, "rb").read().splitlines() trainLength = int(len(trainData) * 0.6) testLength = len(trainData) - trainLength trainData2 = [] for i in range(trainLength): trainData2.append(random.choice(trainData)) [xdata,ydata] = func.parseInfo(trainData2) w_temp = linearRegression.LinearRegression(xdata, ydata) w[tool] = w_temp ##---- Get the error norm and score on the training data -----## TrainToolsDict[tool] = func.getPredictedScoreError(xdata,ydata, w_temp) #print "Completed Linear Regression learning .....................\n" func.reportTrend(TrainToolsDict, "Linear: Training on SVComp14") # print "w:" , w ##------ Testing on SVComp15 (Predict the overall Winner)---------- if(testId!=-1 and trainId!=-1): ##indicates training dataStructure is available TestToolDict = {}