Пример #1
0
def Run_Q3_3_2_sp():
    print '*********************************************************************************'
    print "//Question-3.3.2 (Simple Perceptron-single pass) Report on Adult Data -----------------------------"
    print '*********************************************************************************'
    [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle)
    maxVecLen = max(FSize)
    margin = 0
    for i in range(1, 2):
        for j in LRateList:
            [Bias, Wvec,
             LearningMistake] = func.Perceptron(j, 1, XData, YData, maxVecLen,
                                                i, margin, 0, SeenWt)
            mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias,
                                               maxVecLen, margin)
            TrainAccuracy = (
                (float(len(XData) - mistakeCount)) / len(XData)) * 100
            print 'Using Learning Rate = ', j, ' number of epochs = Sinlge Pass,  Initialization = random with mean 0 and sd 0.01'
            #	print '\t Learned Weight Vector    = ', Wvec
            print '\t Learned Bias             = ', Bias
            print '\t Mistakes during Learning = ', LearningMistake
            print '\t Learning Accuracy        = ', TrainAccuracy, '%\n'
            if (test_index != -1):
                [testXData,
                 testYData] = func.parseInfoTest(testFilehandle, maxVecLen)
                testMistake = func.TestPerceptron(testXData, testYData, Wvec,
                                                  Bias, maxVecLen, margin)
                TestAccuracy = ((float(len(testXData) - testMistake)) /
                                len(testXData)) * 100
                print '\t Test Accuracy            = ', TestAccuracy, '%\n'
    print "//End of Question-3.3.2(simple Perceptron-single pass) Report ------------------------------------\n\n\n"
Пример #2
0
def Run_Q3_grad_with_Shuffle():
    print '*********************************************************************************'
    print "//Question-3: For Grads (Aggressive Perceptron-batch 3,4,5) With Shuffle Report on Adult Data Set -------------"
    print '*********************************************************************************'
    [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle)
    maxVecLen = max(FSize)
    for i in range(3, 6):
        for k in MarginList:
            [Bias, Wvec, LearningMistake
             ] = func.AggressivePerceptron(1, XData, YData, maxVecLen, i, k, 1,
                                           SeenWt)
            mistakeCount = func.TestMarginPerceptron(XData, YData, Wvec, Bias,
                                                     maxVecLen, k)
            TrainAccuracy = (
                (float(len(XData) - mistakeCount)) / len(XData)) * 100
            print 'Using Dynamic Learning rate, number of epochs = ', i, ', Margin = ', k, ', Initialization = random with mean 0 and sd 0.01, with Shuffle'
            #print '\t Learned Weight Vector    = ', Wvec
            print '\t Learned Bias             = ', Bias
            print '\t Mistakes during Learning = ', LearningMistake
            print '\t Learning Accuracy        = ', TrainAccuracy, '%\n'
            if (test_index != -1):
                [testXData,
                 testYData] = func.parseInfoTest(testFilehandle, maxVecLen)
                testMistake = func.TestMarginPerceptron(
                    testXData, testYData, Wvec, Bias, maxVecLen, k)
                TestAccuracy = ((float(len(testXData) - testMistake)) /
                                len(testXData)) * 100
                print '\t Test Accuracy            = ', TestAccuracy, '%\n'

    print "//End of Question-3: For Grads(Aggressive Perceptron-Batch Mode-Shuffle Report -----------------------------\n\n\n"
Пример #3
0
def Run_Q3_3_3_mp_batch_noShuffle():
    print '***************************************************************************************************'
    print "//Question-3.3.2 (Margin Perceptron-batch 3,4,5) No Shuffle Report on Adult Data -----------------------------"
    print '***************************************************************************************************'
    [XData, YData, FSize, SeenWt] = func.parseInfo(trainFileHandle)
    maxVecLen = max(FSize)
    for i in range(3, 6):
        for j in LRateList:
            for k in MarginList:
                [Bias, Wvec,
                 LearningMistake] = func.Perceptron(j, 1, XData, YData,
                                                    maxVecLen, i, k, 0, SeenWt)
                mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias,
                                                   maxVecLen, k)
                TrainAccuracy = (
                    (float(len(XData) - mistakeCount)) / len(XData)) * 100
                print 'Using Learning Rate = ', j, ', number of epochs = ', i, '  Margin = ', k, ', Initialization = random with mean 0 and sd 0.01'
                #	print '\t Learned Weight Vector    = ', Wvec
                print '\t Learned Bias             = ', Bias
                print '\t Mistakes during Learning = ', LearningMistake
                print '\t Learning Accuracy        = ', TrainAccuracy, '%\n'
                if (test_index != -1):
                    [testXData,
                     testYData] = func.parseInfoTest(testFilehandle, maxVecLen)
                    testMistake = func.TestPerceptron(testXData, testYData,
                                                      Wvec, Bias, maxVecLen, k)
                    TestAccuracy = ((float(len(testXData) - testMistake)) /
                                    len(testXData)) * 100
                    print '\t Test Accuracy            = ', TestAccuracy, '%\n'
    print "//End of Question-3.3.2(Margin Perceptron-Batch Mode-NoShuffle) Report ------------------------------------\n\n\n"
Пример #4
0
def Run_Q1():

    [XData, YData, FSize] = func.parseInfo(trainFileHandle)
    print "YData: ", len(YData), len(XData), FSize
    sigmaSq = 1000
    lr = 0.1
    [WVec, LearningMistake, lr,
     neglogdata] = func.LogReg(XData, YData, FSize, sigmaSq, lr, 100, 1)
    #[WVec, LearningMistake,lr] = func.LogReg(XData, YData, FSize, sigmaSq, lr, 20,0)
    FinalTrainAcc = 100 * float(len(XData) - LearningMistake) / len(XData)
    print "Final Training Accuracy = ", FinalTrainAcc, " %"
    if (test_index != -1):
        [testXData, testYData] = func.parseInfoTest(testFileHandle, FSize)
        print "From Test:", len(testXData), len(testYData)
        TestMistakes = func.LogRegTest(WVec, testXData, testYData)
        FinalTestAcc = 100 * float(len(testXData) -
                                   TestMistakes) / len(testXData)
        print "Final Test Accuracy = ", FinalTestAcc, " %"
        #print WVec
    ##------- Negative log-likelihood Plot ---------------
    xplot = [neglogdata[i][0] for i in range(0, len(neglogdata))]
    yplot = [neglogdata[i][1] for i in range(0, len(neglogdata))]
    plt.plot(xplot, yplot, 'bs')
    plt.ylabel('Negative Log Likelihood')
    plt.xlabel('Epochs')
    plt.show()
Пример #5
0
def Run_Q3_3_1():
    print '*********************************************************************************'
    print "//Question-3.3.1 Report (Sanity check using Table2 -----------------------------"
    print '*********************************************************************************'
    [XData, YData, FSize, SeenWt] = func.parseInfo(sanityTable)
    maxVecLen = max(FSize)
    for i in range(1, 2):
        for j in LRateList:
            [Bias, Wvec,
             LearningMistake] = func.Perceptron(j, 0, XData, YData, maxVecLen,
                                                i, 0, 0, SeenWt)
            mistakeCount = func.TestPerceptron(XData, YData, Wvec, Bias,
                                               maxVecLen, 0)
            TrainAccuracy = (float(len(XData) - mistakeCount) /
                             len(XData)) * 100
            print 'Using Learning Rate = ', j, ' number of epochs = ', i, 'Initialization = @ default 0 , No-Shuffle'
            print '\t Learned Weight Vector    = ', Wvec
            print '\t Learned Bias             = ', Bias
            print '\t Mistakes during Learning = ', LearningMistake
            print '\t Learning Accuracy        = ', TrainAccuracy, '%\n'
    print "//End of Question-3.3.1 Report -------------------------------------------------\n\n"
Пример #6
0
def Run_kvalidate():
    [XData, YData, FSize] = func.parseInfo(trainFileHandle)
    bestSigmaSq = 0
    bestlr = 0
    MaxAccuracy = 0
    epochs = 20
    #lr = 0.00001

    kfoldData = []
    print "\n################################################################"
    print "        Starting 5 Fold Cross Validation                        "
    print "################################################################\n"
    for lr0 in lrlist:
        for sigmaSq in SigmaSqList:
            blockSize = len(XData) / foldValue
            testAcc = 0
            trainAcc = 0
            for k in range(0, foldValue):
                #print "SigmaSq = ",sigmaSq, ", LR0 = ",lr0
                KXTest = XData[k * blockSize:(k + 1) * blockSize]
                KYTest = YData[k * blockSize:(k + 1) * blockSize]
                KXData = [
                    XData[i] for i in range(0, len(XData))
                    if (i < k * blockSize or i >= (k + 1) * blockSize)
                ]
                KYData = [
                    YData[i] for i in range(0, len(XData))
                    if (i < k * blockSize or i >= (k + 1) * blockSize)
                ]
                [Wvec, trainMist, lr] = func.LogReg(KXData, KYData, FSize,
                                                    sigmaSq, lr0, epochs, 0)
                testMist = func.LogRegTest(Wvec, KXTest, KYTest)
                trainAcc = trainAcc + 100 * float(
                    (len(KXData) - trainMist)) / len(KXData)
                testAcc = testAcc + 100 * float(
                    (len(KXTest) - testMist)) / len(KXTest)
            avgtrainacc = trainAcc / foldValue
            avgtestacc = testAcc / foldValue
            print "HyperParameter INFO:: %15s" % "SigmaSquare", " = %6s" % sigmaSq, ": %20s" % " LearningRate", " = %6s" % lr0, ": %10s" % " TrainAcc", " = %8s" % avgtrainacc, ": %10s" % " TestAcc", " = %8s " % avgtestacc
            kfoldData.append([sigmaSq, lr0, avgtrainacc, avgtestacc])
            if (MaxAccuracy < avgtestacc):
                MaxAccuracy = avgtestacc
                bestSigmaSq = sigmaSq
                bestlr = lr0
            print "HyperParameter INFO:: %15s" % "Best SigmaSq", " = %6s" % bestSigmaSq, ": %20s" % "BestLearningRate", " = %6s" % bestlr, "\n\n"

    print "\n\n******************************************************"
    print "HyperParameter Info:: Best Found SigmaSquare = %8s" % bestSigmaSq, ": Best Initial Learning Rate = %8s" % bestlr
    print "******************************************************\n\n"

    ##----------- Cross Validation Report ------------------------
    #	for i in kfoldData:
    #		print "SigmaSquare = ", i[0], "LearningRate = ", i[1], ", TrainAcc = ",i[2], ", TestAcc = ",i[3]

    ##----------- Now learn on the entire data set --------------------##
    print "Starting Final Training (On 80 Epochs)..................."
    [Wvec, trainMist, lr, neglogdata] = func.LogReg(XData, YData, FSize,
                                                    bestSigmaSq, bestlr, 80, 1)
    print "Training Completed ......................."
    FinalTrainAcc = 100 * float(len(XData) - trainMist) / len(XData)
    print "Final Training Accuracy = %8s" % FinalTrainAcc, "%\n\n"
    #print "Train Data size = ", len(XData)
    if (test_index != -1):
        print "Initiating Final Testing ................."
        [testXData, testYData] = func.parseInfoTest(testFileHandle, FSize)
        TestMistakes = func.LogRegTest(Wvec, testXData, testYData)
        #print "Test Data size = ", len(testXData)
        print "Final Testing Completed................."
        FinalTestAcc = 100 * float(len(testXData) -
                                   TestMistakes) / len(testXData)
        print "Final Test Accuracy = %8s" % FinalTestAcc, "%\n\n"

    ##------- Negative log-likelihood Plot ---------------
    print "\n\n###########################################"
    print "Plotting the Negative Log Likelihood"
    print "###########################################"
    xplot = [neglogdata[i][0] for i in range(0, len(neglogdata))]
    yplot = [neglogdata[i][1] for i in range(0, len(neglogdata))]
    plt.plot(xplot, yplot)
    plt.ylabel('Negative Log Likelihood')
    plt.xlabel('Epochs')
    plt.show()
Пример #7
0
        w_temp2 = {}
        TrainToolsDict_temp = {}
        accuracy_lst = []
        ##--------- Get the training data ---------##
        dataFileName = Dpath + "/Training/" + tool + ".data"
        allData = open(dataFileName, "rb").read().splitlines()

        for t in range(epoch):
            random.shuffle(allData)
            trainLength = int(len(allData) * 0.6)
            testLength = len(allData) - trainLength
            trainData = []
            for i in range(trainLength):
                trainData.append(random.choice(allData))

            [xTrainData, yTrainData] = func.parseInfo(trainData)
            w_temp2[t] = linearRegression.LinearRegression(
                xTrainData, yTrainData)

            ##---- Get the error norm and score on the training data -----##
            TrainToolsDict_temp[t] = func.getPredictedScoreError(
                xTrainData, yTrainData, w_temp2[t])
            testData = [item for item in allData if item not in trainData]
            [xTestData, yTestData] = func.parseInfo(testData)
            accuracy_lst.append(func.accuracy(w_temp2[t], xTestData,
                                              yTestData))

        #func.reportAccuracy(tool, accuracy_lin[tool])

        ## --- Get the mean of acuuracies --- ##
        accuracy_lin[tool] = sum(accuracy_lst) / len(accuracy_lst) * 100
Пример #8
0
        TrainToolsDict = {}
        for tool in trainTools:
             ##--------- Get the training data ---------##
             trainFileName = Dpath+"/Training/"+tool+".data"

             #trainData = csv.reader(open(trainFileName, "rb"), delimiter=" ")
	     
	     trainData = open(trainFileName, "rb").read().splitlines()	     
	     
	     trainLength = int(len(trainData) * 0.6)
	     testLength = len(trainData) - trainLength
	     trainData2 = []
	     for i in range(trainLength):
		trainData2.append(random.choice(trainData))
	
	     [xdata,ydata] = func.parseInfo(trainData2)
             w_temp = linearRegression.LinearRegression(xdata, ydata)
             w[tool] = w_temp

			 ##---- Get the error norm and score on the training data -----##
             TrainToolsDict[tool] = func.getPredictedScoreError(xdata,ydata, w_temp)
		#print "Completed Linear Regression learning .....................\n"

        func.reportTrend(TrainToolsDict, "Linear: Training on SVComp14")

     #   print "w:" , w

##------ Testing on SVComp15 (Predict the overall Winner)----------
if(testId!=-1 and trainId!=-1): ##indicates training dataStructure is available
	
	TestToolDict = {}