示例#1
0
def runTrainError(dataFile, classFile, vocab, samples):
    print "Samples:" , samples
    print "Vocab size:", vocab
    data = ExampleData(int(samples), int(vocab))
    data.readDataVectorFile(dataFile)
    data.readClassificationFile(classFile)
    
    learner = Learner(data)
    error = dict()
    for cl in learner.clf.keys():
        learner.learn(cl)
        error[cl] = learner.test(cl, data.Xdata, data.Ydata)
    
    print "NB train acc:%f\tLinearSVC train acc:%f\tSVM train acc:%f\tSGD train acc:%f" %(error["NB"], error["LinearSVC"], error["SVM"], error["SGD"])
示例#2
0
def runML(dataFile, classFile, vocab, samples):
    print "Samples:" , samples
    print "Vocab size:", vocab
    data = ExampleData(int(samples), int(vocab))
    data.readDataVectorFile(dataFile)
    data.readClassificationFile(classFile)
    avgNB = 0
    avgLin = 0
    avgSVC = 0
    avgSGD = 0
    numTrials = 5
    learner = Learner(None)
    for n in range(numTrials):
     
        
        score = learner.test_kf("NB", data.Xdata, data.Ydata)
        meanNB = score.mean()
        avgNB += meanNB
        print "NB:", meanNB
        score = learner.test_kf("LinearSVC", data.Xdata, data.Ydata)
        meanLin = score.mean()
        avgLin +=  meanLin
        print "LinearSVC:", meanLin
        score = learner.test_kf("SGD", data.Xdata, data.Ydata)
        meanSGD = score.mean()
        avgSGD +=  meanSGD
        print "SGD:", meanSGD
        score = learner.test_kf("SVM", data.Xdata, data.Ydata)
        meanSVC = score.mean()
        avgSVC +=  meanSVC
        print "SVM:", meanSVC
        
        
    avgNB /= numTrials
    avgLin /=numTrials
    avgSGD /=numTrials
    avgSVC /=numTrials
    print "NB Avg:%f\tLinearSVC Avg:%f\tSVM Avg:%f\tSGD Avg:%f" %(avgNB, avgLin, avgSVC, avgSGD)