def getSetup(learnerName, dataDir, outputDir, numProcesses): if learnerName=="SVM": learner = LibSVM(kernel='gaussian', type="C_SVC", processes=numProcesses) loadMethod = ModelSelectUtils.loadRatschDataset dataDir += "benchmark/" outputDir += "classification/" + learnerName + "/" paramDict = {} paramDict["setC"] = learner.getCs() paramDict["setGamma"] = learner.getGammas() elif learnerName=="SVR": learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) learner.normModelSelect = True loadMethod = ModelSelectUtils.loadRegressDataset dataDir += "regression/" outputDir += "regression/" + learnerName + "/" paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner.getEpsilons() elif learnerName=="CART": learner = DecisionTreeLearner(criterion="mse", maxDepth=30, minSplit=1, pruneType="CART", processes=numProcesses) learner.setChunkSize(2) loadMethod = ModelSelectUtils.loadRegressDataset dataDir += "regression/" outputDir += "regression/" + learnerName + "/" paramDict = {} paramDict["setGamma"] = numpy.array(numpy.round(2**numpy.arange(1, 7.5, 0.5)-1), dtype=numpy.int) else: raise ValueError("Unknown learnerName: " + learnerName) return learner, loadMethod, dataDir, outputDir, paramDict
def computeIdealPenalty(args): """ Find the complete penalty. """ (X, y, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X) = args svm = LibSVM('gaussian', gamma, C) svm.learnModel(X, y) predY = svm.predict(X) predFullY, decisionsY = svm.predict(fullX, True) decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F") trueError = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X) idealPenalty = trueError - Evaluator.binaryError(predY, y) return idealPenalty
def testGetModel(self): try: import sklearn except ImportError as error: return numExamples = 50 numFeatures = 3 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() svm.learnModel(X, y) weights, b = svm.getWeights()
def testStr(self): try: import sklearn except ImportError as error: return svm = LibSVM()
def testPredict(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() y = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.learnModel(X, y) y2, d = svm.predict(X, True)
def testPredict(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() y = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.learnModel(X, y) y2, d = svm.predict(X, True)
def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0]*0.5 trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error)
def profileParallelPen(self): learner = LibSVM(processes=8) learner.setChunkSize(2) numExamples = 10000 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)*2-1 Cvs = [self.folds-1] def run(): for i in range(2): print("Iteration " + str(i)) idx = Sampling.crossValidation(self.folds, numExamples) learner.parallelPen(X, Y, idx, self.paramDict, Cvs) ProfileUtils.profile('run()', globals(), locals())
def profileParallelPen(self): learner = LibSVM(processes=8) learner.setChunkSize(2) numExamples = 10000 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int) * 2 - 1 Cvs = [self.folds - 1] def run(): for i in range(2): print("Iteration " + str(i)) idx = Sampling.crossValidation(self.folds, numExamples) learner.parallelPen(X, Y, idx, self.paramDict, Cvs) ProfileUtils.profile('run()', globals(), locals())
def testGetWeights(self): try: import sklearn except ImportError as error: return numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float64) #X = numpy.random.rand(numExamples, 10) y = numpy.array([[-1], [-1], [-1], [1], [1] ,[1]]) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.zeros(numExamples) decisions2 = numpy.dot(X, weights) - b self.assertTrue((decisions == decisions2).all()) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) #Do the same test on a random datasets numExamples = 50 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) y = numpy.sign(numpy.random.rand(numExamples)-0.5) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.dot(X, weights) + b tol = 10**-6 self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all())
def setUp(self): try: import sklearn except ImportError as error: logging.debug(error) return numpy.random.seed(21) numExamples = 100 numFeatures = 10 eg = ExamplesGenerator() self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures) self.svm = LibSVM() self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float) self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float) self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float) numpy.set_printoptions(linewidth=150, suppress=True, precision=3) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
def testLearnModel(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Test for wrong labels numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float) y = numpy.array([[-1], [-1], [-1], [1], [1] ,[5]]) self.assertRaises(ValueError, self.svm.learnModel, X, y) #Try the regression SVM svm = LibSVM(type="Epsilon_SVR") y = numpy.random.rand(self.X.shape[0]) svm.learnModel(self.X, self.y)
def testComputeBootstrapError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0] * 0.5 trainX, trainY = self.X[ 0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeBootstrapError(args)
def testLearnModel(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Test for wrong labels numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float) y = numpy.array([[-1], [-1], [-1], [1], [1], [5]]) self.assertRaises(ValueError, self.svm.learnModel, X, y) #Try the regression SVM svm = LibSVM(type="Epsilon_SVR") y = numpy.random.rand(self.X.shape[0]) svm.learnModel(self.X, self.y)
def profileModelSelect(self): learner = LibSVM() numExamples = 10000 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int) * 2 - 1 def run(): for i in range(5): print("Iteration " + str(i)) idx = Sampling.crossValidation(self.folds, numExamples) learner.parallelModelSelect(X, Y, idx, self.paramDict) ProfileUtils.profile('run()', globals(), locals())
def __init__(self, examplesFileName): """ Create the class by reading examples from a Matlab file. Instantiate the SVM and create a preprocesor to standarise examples to have zero mean and unit variance. """ self.examplesList = ExamplesList.readFromFile(examplesFileName) self.examplesList.setDefaultExamplesName("X") self.examplesList.setLabelsName("y") (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel()) logging.info("Distribution of labels: " + str((freqs, items))) logging.info("The base error rate is " + str(float(min(freqs))/self.examplesList.getNumExamples())) self.classifier = LibSVM() self.errorMethod = Evaluator.balancedError self.preprocessor = Standardiser() X = self.preprocessor.standardiseArray(self.examplesList.getDataField(self.examplesList.getDefaultExamplesName())) self.examplesList.overwriteDataField(self.examplesList.getDefaultExamplesName(), X)
def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0] * 0.5 trainX, trainY = self.X[ 0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error)
def testComputeIdealPenalty(self): C = 10.0 gamma = 0.5 svm = LibSVM("gaussian", gamma, C) args = (self.X, self.y, self.X, self.y, svm) error = computeIdealPenalty(args)
def testSetErrorCost(self): try: import sklearn except ImportError as error: return numExamples = 1000 numFeatures = 100 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() C = 0.1 kernel = "linear" kernelParam = 0 svm.setKernel(kernel, kernelParam) svm.setC(C) svm.setErrorCost(0.1) svm.learnModel(X, y) predY = svm.classify(X) e1 = Evaluator.binaryErrorP(y, predY) svm.setErrorCost(0.9) svm.learnModel(X, y) predY = svm.classify(X) e2 = Evaluator.binaryErrorP(y, predY) self.assertTrue(e1 > e2)
Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) epsilons = numpy.array([2**-2]) paramDict = {} paramDict["setC"] = Cs paramDict["setGamma"] = gammas paramDict["setEpsilon"] = epsilons sampleMethod = Sampling.crossValidation numProcesses = multiprocessing.cpu_count() j = 0 trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j) learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) for sampleSize in sampleSizes: print("Sample size " +str(sampleSize)) trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize] validX = trainX[trainInds,:] validY = trainY[trainInds] #errors = learner.parallelPenaltyGrid(validX, validY, testX, testY, paramDict, computeTestError) #errors = numpy.squeeze(errors) errors = numpy.zeros((Cs.shape[0], gammas.shape[0])) norms = numpy.zeros((Cs.shape[0], gammas.shape[0])) for i, C in enumerate(Cs):
i = 0 datasetName = datasetNames[i][0] numRealisations = datasetNames[i][1] logging.debug("Learning using dataset " + datasetName) data = numpy.load(dataDir + datasetName + ".npz") gridPoints, trainX, trainY, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"] #We form a test set from the grid points testX = numpy.zeros((gridPoints.shape[0]**2, 2)) for m in range(gridPoints.shape[0]): testX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints testX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m] svm = LibSVM() logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds") perm = numpy.random.permutation(trainX.shape[0]) trainInds = perm[0:sampleSize] validX = trainX[trainInds, :] validY = trainY[trainInds] logging.debug("Finding ideal grid of penalties") idealGrid = parallelPenaltyGridRbf(svm, validX, validY, testX, gridPoints, pdfX, pdfY1X, pdfYminus1X) for s in range(len(sampleMethods)): sampleMethod = sampleMethods[s][1] logging.debug("Sampling method :" + str(sampleMethod)) idx = sampleMethod(folds, validY.shape[0])
def testModelSelect(self): """ We test the results on some data and compare to SVR. """ numExamples = 200 X, y = data.make_regression(numExamples, noise=0.5) X = Standardiser().standardiseArray(X) y = Standardiser().standardiseArray(y) trainX = X[0:100, :] trainY = y[0:100] testX = X[100:, :] testY = y[100:] learner = DecisionTreeLearner(maxDepth=20, minSplit=10, pruneType="REP-CV") learner.setPruneCV(8) paramDict = {} paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 10) paramDict["setPruneCV"] = numpy.arange(6, 11, 2, numpy.int) folds = 5 idx = Sampling.crossValidation(folds, trainX.shape[0]) bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestTree.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error) learner = DecisionTreeLearner(maxDepth=20, minSplit=5, pruneType="CART") paramDict = {} paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 50) folds = 5 idx = Sampling.crossValidation(folds, trainX.shape[0]) bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestTree.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error) return #Let's compare to the SVM learner2 = LibSVM(kernel='gaussian', type="Epsilon_SVR") paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner2.getEpsilons() idx = Sampling.crossValidation(folds, trainX.shape[0]) bestSVM, cvGrid = learner2.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestSVM.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error)
def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue( bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
import matplotlib.pyplot as plt logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.seterr(all="raise") numpy.random.seed(21) dataDir = PathDefaults.getDataDir() dataDir += "modelPenalisation/regression/" outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/" figInd = 0 loadMethod = ModelSelectUtils.loadRegressDataset datasets = ModelSelectUtils.getRegressionDatasets(True) numProcesses = multiprocessing.cpu_count() learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR") learner.setChunkSize(3) Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) epsilons = learner.getEpsilons() numCs = Cs.shape[0] numGammas = gammas.shape[0] numEpsilons = epsilons.shape[0] learner.normModelSelect = True paramDict = {} paramDict["setC"] = Cs paramDict["setGamma"] = gammas
def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
def testSaveParams(self): try: import sklearn except ImportError as error: return svm = LibSVM() svm.setC(10.5) svm.setEpsilon(12.1) svm.setErrorCost(1.8) svm.setSvmType("Epsilon_SVR") svm.setTermination(0.12) svm.setKernel("gaussian", 0.43) outputDir = PathDefaults.getOutputDir() fileName = outputDir + "test/testSvmParams" svm.saveParams(fileName) svm2 = LibSVM() svm2.loadParams(fileName) self.assertEquals(svm.getC(), 10.5) self.assertEquals(svm.getEpsilon(), 12.1) self.assertEqual(svm.getErrorCost(), 1.8) self.assertEqual(svm.getSvmType(), "Epsilon_SVR") self.assertEqual(svm.getTermination(), 0.12) self.assertEqual(svm.getKernel(), "gaussian") self.assertEqual(svm.getKernelParams(), 0.43)
def testSetEpsilon(self): """ Test out the parameter for the regressive SVM, vary epsilon and look at number of support vectors. """ try: import sklearn except ImportError as error: return svm = LibSVM() svm.setC(10.0) svm.setEpsilon(0.1) svm.setSvmType("Epsilon_SVR") numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100) svm.setEpsilon(1.0) svm.learnModel(X, y) numSV = svm.getModel().support_.shape svm.setEpsilon(0.5) svm.learnModel(X, y) numSV2 = svm.getModel().support_.shape svm.setEpsilon(0.01) svm.learnModel(X, y) numSV3 = svm.getModel().support_.shape #There should be fewer SVs as epsilon increases self.assertTrue(numSV < numSV2) self.assertTrue(numSV2 < numSV3)
class LibSVMTest(unittest.TestCase): def setUp(self): try: import sklearn except ImportError as error: logging.debug(error) return numpy.random.seed(21) numExamples = 100 numFeatures = 10 eg = ExamplesGenerator() self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures) self.svm = LibSVM() self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float) self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float) self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float) numpy.set_printoptions(linewidth=150, suppress=True, precision=3) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) def testLearnModel(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Test for wrong labels numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float) y = numpy.array([[-1], [-1], [-1], [1], [1], [5]]) self.assertRaises(ValueError, self.svm.learnModel, X, y) #Try the regression SVM svm = LibSVM(type="Epsilon_SVR") y = numpy.random.rand(self.X.shape[0]) svm.learnModel(self.X, self.y) def testSetErrorCost(self): try: import sklearn except ImportError as error: return numExamples = 1000 numFeatures = 100 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() C = 0.1 kernel = "linear" kernelParam = 0 svm.setKernel(kernel, kernelParam) svm.setC(C) svm.setErrorCost(0.1) svm.learnModel(X, y) predY = svm.classify(X) e1 = Evaluator.binaryErrorP(y, predY) svm.setErrorCost(0.9) svm.learnModel(X, y) predY = svm.classify(X) e2 = Evaluator.binaryErrorP(y, predY) self.assertTrue(e1 > e2) def testClassify(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Now, permute examples perm = numpy.random.permutation(self.X.shape[0]) predY = self.svm.classify(self.X[perm, :]) y = y[perm] e2 = Evaluator.binaryError(y, predY) self.assertEquals(e, e2) def testEvaluateCv(self): try: import sklearn except ImportError as error: return folds = 10 (means, vars) = self.svm.evaluateCv(self.X, self.y, folds) self.assertTrue((means <= 1).all()) self.assertTrue((means >= 0).all()) self.assertTrue((vars <= 1).all()) self.assertTrue((vars >= 0).all()) @apgl.skip("") def testGetModel(self): try: import sklearn except ImportError as error: return numExamples = 50 numFeatures = 3 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() svm.learnModel(X, y) weights, b = svm.getWeights() #logging.debug(weights) #logging.debug(b) @apgl.skip("") def testGetWeights(self): try: import sklearn except ImportError as error: return numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float64) #X = numpy.random.rand(numExamples, 10) y = numpy.array([[-1], [-1], [-1], [1], [1], [1]]) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.zeros(numExamples) decisions2 = numpy.dot(X, weights) - b self.assertTrue((decisions == decisions2).all()) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) #Do the same test on a random datasets numExamples = 50 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) y = numpy.sign(numpy.random.rand(numExamples) - 0.5) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.dot(X, weights) + b tol = 10**-6 self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) def testSetTermination(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) self.svm.setTermination(0.1) self.svm.learnModel(self.X, self.y) def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue( bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1) @apgl.skip("") def testSaveParams(self): try: import sklearn except ImportError as error: return svm = LibSVM() svm.setC(10.5) svm.setEpsilon(12.1) svm.setErrorCost(1.8) svm.setSvmType("Epsilon_SVR") svm.setTermination(0.12) svm.setKernel("gaussian", 0.43) outputDir = PathDefaults.getOutputDir() fileName = outputDir + "test/testSvmParams" svm.saveParams(fileName) svm2 = LibSVM() svm2.loadParams(fileName) self.assertEquals(svm.getC(), 10.5) self.assertEquals(svm.getEpsilon(), 12.1) self.assertEqual(svm.getErrorCost(), 1.8) self.assertEqual(svm.getSvmType(), "Epsilon_SVR") self.assertEqual(svm.getTermination(), 0.12) self.assertEqual(svm.getKernel(), "gaussian") self.assertEqual(svm.getKernelParams(), 0.43) def testStr(self): try: import sklearn except ImportError as error: return svm = LibSVM() #logging.debug(svm) def testSetEpsilon(self): """ Test out the parameter for the regressive SVM, vary epsilon and look at number of support vectors. """ try: import sklearn except ImportError as error: return svm = LibSVM() svm.setC(10.0) svm.setEpsilon(0.1) svm.setSvmType("Epsilon_SVR") numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100) svm.setEpsilon(1.0) svm.learnModel(X, y) numSV = svm.getModel().support_.shape svm.setEpsilon(0.5) svm.learnModel(X, y) numSV2 = svm.getModel().support_.shape svm.setEpsilon(0.01) svm.learnModel(X, y) numSV3 = svm.getModel().support_.shape #There should be fewer SVs as epsilon increases self.assertTrue(numSV < numSV2) self.assertTrue(numSV2 < numSV3) def testPredict(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() y = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.learnModel(X, y) y2, d = svm.predict(X, True) #self.assertTrue((numpy.sign(d) == y2).all()) #@unittest.skip("") def testParallelVfcvRbf(self): folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += Evaluator.binaryError(predY, testY) meanErrors2[i, j] = error / len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) def testParallelVfcvRbf2(self): #In this test we try SVM regression folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx, type="Epsilon_SVR") tol = 10**-6 bestError = 100 meanErrors2 = numpy.zeros( (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += svm.getMetricMethod()(predY, testY) meanErrors2[j, k, i] = error / len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error bestEpsilon = epsilon self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertEquals(bestEpsilon, bestSVM.getEpsilon()) self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol) def testParallelVfPenRbf(self): folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += Evaluator.binaryError( predY, self.y) - Evaluator.binaryError( predTrainY, trainY) penalty = penalty * Cv[0] / len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty if meanErrors2[i, j] < bestError: bestC = C bestGamma = gamma bestError = meanErrors2[i, j] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) #@unittest.skip("") def testParallelVfPenRbf2(self): #Test support vector regression folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv, type="Epsilon_SVR") tol = 10**-6 bestError = 100 meanErrors2 = numpy.zeros( (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += svm.getMetricMethod()( predY, self.y) - svm.getMetricMethod()(predTrainY, trainY) penalty = penalty * Cv[0] / len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) meanErrors2[j, k, i] = svm.getMetricMethod()( predY, self.y) + penalty if meanErrors2[j, k, i] < bestError: bestC = C bestGamma = gamma bestEpsilon = epsilon bestError = meanErrors2[j, k, i] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertEquals(bestEpsilon, bestSVM.getEpsilon()) self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol) def testGetC(self): svm = LibSVM() svm.setC(10.0) C = svm.getC() self.assertTrue(C == 10.0) def testGetGamma(self): svm = LibSVM() svm.setKernel("gaussian", 12.0) gamma = svm.getKernelParams() self.assertTrue(gamma == 12.0) def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0] * 0.5 trainX, trainY = self.X[ 0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error) def testComputeBootstrapError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0] * 0.5 trainX, trainY = self.X[ 0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeBootstrapError(args) def testComputeIdealPenalty(self): C = 10.0 gamma = 0.5 svm = LibSVM("gaussian", gamma, C) args = (self.X, self.y, self.X, self.y, svm) error = computeIdealPenalty(args) def testParallelPenaltyGridRbf(self): svm = self.svm svm.setKernel("gaussian") trainX = self.X[0:40, :] trainY = self.y[0:40] idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y) idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = Evaluator.binaryError( predY, self.y) - Evaluator.binaryError(predTrainY, trainY) idealPenalties2[i, j] = penalty args = (trainX, trainY, self.X, self.y, svm) idealPenalties3[i, j] = computeIdealPenalty(args) tol = 10**-6 self.assertTrue( numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol) def testParallelPenaltyGridRbf2(self): #Test with SVM regression svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") trainX = self.X[0:40, :] trainY = self.y[0:40] idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y, type="Epsilon_SVR") idealPenalties2 = numpy.zeros( (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = svm.getMetricMethod()( predY, self.y) - svm.getMetricMethod()(predTrainY, trainY) idealPenalties2[j, k, i] = penalty tol = 10**-6 self.assertTrue( numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol) def testParallelModelSelect(self): folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) print("Computing real grid") for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += Evaluator.binaryError(predY, testY) meanErrors2[i, j] = error / len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) def testParallelPenaltyGrid2(self): #Test with SVM regression svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") trainX = self.X[0:40, :] trainY = self.y[0:40] paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() paramDict["setEpsilon"] = svm.getEpsilons() #print(paramDict.keys()) idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict) idealPenalties2 = numpy.zeros( (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = svm.getMetricMethod()( predY, self.y) - svm.getMetricMethod()(predTrainY, trainY) idealPenalties2[j, k, i] = penalty tol = 10**-6 self.assertTrue( numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol) def testParallelPen(self): folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv) tol = 10**-6 bestError = 1 trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += Evaluator.binaryError( predY, self.y) - Evaluator.binaryError( predTrainY, trainY) penalty = penalty * Cv[0] / len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) trainErrors2[i, j] = Evaluator.binaryError(predY, self.y) penalties2[i, j] = penalty meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty if meanErrors2[i, j] < bestError: bestC = C bestGamma = gamma bestError = meanErrors2[i, j] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol) self.assertTrue( numpy.linalg.norm(penalties2.T - currentPenalties) < tol) def testParallelPenaltyGrid(self): svm = self.svm svm.setKernel("gaussian") trainX = self.X[0:40, :] trainY = self.y[0:40] paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict) idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = Evaluator.binaryError( predY, self.y) - Evaluator.binaryError(predTrainY, trainY) idealPenalties2[i, j] = penalty args = (trainX, trainY, self.X, self.y, svm) idealPenalties3[i, j] = computeIdealPenalty(args) tol = 10**-6 self.assertTrue( numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol) def testGetBestLearner(self): svm = self.svm paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() errors = numpy.random.rand(svm.getCs().shape[0], svm.getGammas().shape[0]) folds = 5 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm.normModelSelect = True svm.setKernel("gaussian") learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx) bestC = learner.getC() #Find the best norm bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape) learner.setC(svm.getCs()[bestInds[0]]) learner.setGamma(svm.getGammas()[bestInds[1]]) norms = [] for trainInds, testInds in idx: validX = self.X[trainInds, :] validY = self.y[trainInds] learner.learnModel(validX, validY) norms.append(learner.weightNorm()) bestNorm = numpy.array(norms).mean() norms = numpy.zeros(paramDict["setC"].shape[0]) for i, C in enumerate(paramDict["setC"]): learner.setC(C) learner.learnModel(self.X, self.y) norms[i] = learner.weightNorm() bestC2 = paramDict["setC"][numpy.abs(norms - bestNorm).argmin()] self.assertEquals(bestC, bestC2)
import matplotlib.pyplot as plt logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.seterr(all="raise") numpy.random.seed(21) dataDir = PathDefaults.getDataDir() dataDir += "modelPenalisation/regression/" outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/" figInd = 0 loadMethod = ModelSelectUtils.loadRegressDataset datasets = ModelSelectUtils.getRegressionDatasets(True) numProcesses = multiprocessing.cpu_count() learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR") learner.setChunkSize(3) Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) epsilons = learner.getEpsilons() gammaInd = 3 gamma = gammas[gammaInd] learner.setGamma(gamma) epsilonInd = 0 epsilon = epsilons[epsilonInd] learner.setEpsilon(epsilon) learner.normModelSelect = True
class LibSVMTest(unittest.TestCase): def setUp(self): try: import sklearn except ImportError as error: logging.debug(error) return numpy.random.seed(21) numExamples = 100 numFeatures = 10 eg = ExamplesGenerator() self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures) self.svm = LibSVM() self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float) self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float) self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float) numpy.set_printoptions(linewidth=150, suppress=True, precision=3) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) def testLearnModel(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Test for wrong labels numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float) y = numpy.array([[-1], [-1], [-1], [1], [1] ,[5]]) self.assertRaises(ValueError, self.svm.learnModel, X, y) #Try the regression SVM svm = LibSVM(type="Epsilon_SVR") y = numpy.random.rand(self.X.shape[0]) svm.learnModel(self.X, self.y) def testSetErrorCost(self): try: import sklearn except ImportError as error: return numExamples = 1000 numFeatures = 100 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() C = 0.1 kernel = "linear" kernelParam = 0 svm.setKernel(kernel, kernelParam) svm.setC(C) svm.setErrorCost(0.1) svm.learnModel(X, y) predY = svm.classify(X) e1 = Evaluator.binaryErrorP(y, predY) svm.setErrorCost(0.9) svm.learnModel(X, y) predY = svm.classify(X) e2 = Evaluator.binaryErrorP(y, predY) self.assertTrue(e1 > e2) def testClassify(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Now, permute examples perm = numpy.random.permutation(self.X.shape[0]) predY = self.svm.classify(self.X[perm, :]) y = y[perm] e2 = Evaluator.binaryError(y, predY) self.assertEquals(e, e2) def testEvaluateCv(self): try: import sklearn except ImportError as error: return folds = 10 (means, vars) = self.svm.evaluateCv(self.X, self.y, folds) self.assertTrue((means <= 1).all()) self.assertTrue((means>= 0).all()) self.assertTrue((vars <= 1).all()) self.assertTrue((vars>= 0).all()) @apgl.skip("") def testGetModel(self): try: import sklearn except ImportError as error: return numExamples = 50 numFeatures = 3 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() svm.learnModel(X, y) weights, b = svm.getWeights() #logging.debug(weights) #logging.debug(b) @apgl.skip("") def testGetWeights(self): try: import sklearn except ImportError as error: return numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float64) #X = numpy.random.rand(numExamples, 10) y = numpy.array([[-1], [-1], [-1], [1], [1] ,[1]]) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.zeros(numExamples) decisions2 = numpy.dot(X, weights) - b self.assertTrue((decisions == decisions2).all()) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) #Do the same test on a random datasets numExamples = 50 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) y = numpy.sign(numpy.random.rand(numExamples)-0.5) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.dot(X, weights) + b tol = 10**-6 self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) def testSetTermination(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) self.svm.setTermination(0.1) self.svm.learnModel(self.X, self.y) def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1) @apgl.skip("") def testSaveParams(self): try: import sklearn except ImportError as error: return svm = LibSVM() svm.setC(10.5) svm.setEpsilon(12.1) svm.setErrorCost(1.8) svm.setSvmType("Epsilon_SVR") svm.setTermination(0.12) svm.setKernel("gaussian", 0.43) outputDir = PathDefaults.getOutputDir() fileName = outputDir + "test/testSvmParams" svm.saveParams(fileName) svm2 = LibSVM() svm2.loadParams(fileName) self.assertEquals(svm.getC(), 10.5) self.assertEquals(svm.getEpsilon(), 12.1) self.assertEqual(svm.getErrorCost(), 1.8) self.assertEqual(svm.getSvmType(), "Epsilon_SVR") self.assertEqual(svm.getTermination(), 0.12) self.assertEqual(svm.getKernel(), "gaussian") self.assertEqual(svm.getKernelParams(), 0.43) def testStr(self): try: import sklearn except ImportError as error: return svm = LibSVM() #logging.debug(svm) def testSetEpsilon(self): """ Test out the parameter for the regressive SVM, vary epsilon and look at number of support vectors. """ try: import sklearn except ImportError as error: return svm = LibSVM() svm.setC(10.0) svm.setEpsilon(0.1) svm.setSvmType("Epsilon_SVR") numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100) svm.setEpsilon(1.0) svm.learnModel(X, y) numSV = svm.getModel().support_.shape svm.setEpsilon(0.5) svm.learnModel(X, y) numSV2 = svm.getModel().support_.shape svm.setEpsilon(0.01) svm.learnModel(X, y) numSV3 = svm.getModel().support_.shape #There should be fewer SVs as epsilon increases self.assertTrue(numSV < numSV2) self.assertTrue(numSV2 < numSV3) def testPredict(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() y = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.learnModel(X, y) y2, d = svm.predict(X, True) #self.assertTrue((numpy.sign(d) == y2).all()) #@unittest.skip("") def testParallelVfcvRbf(self): folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += Evaluator.binaryError(predY, testY) meanErrors2[i, j] = error/len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) def testParallelVfcvRbf2(self): #In this test we try SVM regression folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx, type="Epsilon_SVR") tol = 10**-6 bestError = 100 meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += svm.getMetricMethod()(predY, testY) meanErrors2[j, k, i] = error/len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error bestEpsilon = epsilon self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertEquals(bestEpsilon, bestSVM.getEpsilon()) self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol) def testParallelVfPenRbf(self): folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY) penalty = penalty*Cv[0]/len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty if meanErrors2[i, j] < bestError: bestC = C bestGamma = gamma bestError = meanErrors2[i, j] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) #@unittest.skip("") def testParallelVfPenRbf2(self): #Test support vector regression folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv, type="Epsilon_SVR") tol = 10**-6 bestError = 100 meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY) penalty = penalty*Cv[0]/len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) meanErrors2[j, k, i] = svm.getMetricMethod()(predY, self.y) + penalty if meanErrors2[j, k, i] < bestError: bestC = C bestGamma = gamma bestEpsilon = epsilon bestError = meanErrors2[j, k, i] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertEquals(bestEpsilon, bestSVM.getEpsilon()) self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol) def testGetC(self): svm = LibSVM() svm.setC(10.0) C = svm.getC() self.assertTrue(C == 10.0) def testGetGamma(self): svm = LibSVM() svm.setKernel("gaussian", 12.0) gamma = svm.getKernelParams() self.assertTrue(gamma == 12.0) def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0]*0.5 trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error) def testComputeBootstrapError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0]*0.5 trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeBootstrapError(args) def testComputeIdealPenalty(self): C = 10.0 gamma = 0.5 svm = LibSVM("gaussian", gamma, C) args = (self.X, self.y, self.X, self.y, svm) error = computeIdealPenalty(args) def testParallelPenaltyGridRbf(self): svm = self.svm svm.setKernel("gaussian") trainX = self.X[0:40, :] trainY = self.y[0:40] idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y) idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY) idealPenalties2[i, j] = penalty args = (trainX, trainY, self.X, self.y, svm) idealPenalties3[i, j] = computeIdealPenalty(args) tol = 10**-6 self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol) def testParallelPenaltyGridRbf2(self): #Test with SVM regression svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") trainX = self.X[0:40, :] trainY = self.y[0:40] idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y, type="Epsilon_SVR") idealPenalties2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY) idealPenalties2[j, k, i] = penalty tol = 10**-6 self.assertTrue(numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol) def testParallelModelSelect(self): folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) print("Computing real grid") for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += Evaluator.binaryError(predY, testY) meanErrors2[i, j] = error/len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) def testParallelPenaltyGrid2(self): #Test with SVM regression svm = self.svm svm.setKernel("gaussian") svm.setSvmType("Epsilon_SVR") trainX = self.X[0:40, :] trainY = self.y[0:40] paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() paramDict["setEpsilon"] = svm.getEpsilons() #print(paramDict.keys()) idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict) idealPenalties2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] for k in range(svm.epsilons.shape[0]): epsilon = svm.epsilons[k] svm.setGamma(gamma) svm.setC(C) svm.setEpsilon(epsilon) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY) idealPenalties2[j, k, i] = penalty tol = 10**-6 self.assertTrue(numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol) def testParallelPen(self): folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv) tol = 10**-6 bestError = 1 trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY) penalty = penalty*Cv[0]/len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) trainErrors2[i, j] = Evaluator.binaryError(predY, self.y) penalties2[i, j] = penalty meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty if meanErrors2[i, j] < bestError: bestC = C bestGamma = gamma bestError = meanErrors2[i, j] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol) self.assertTrue(numpy.linalg.norm(penalties2.T - currentPenalties) < tol) def testParallelPenaltyGrid(self): svm = self.svm svm.setKernel("gaussian") trainX = self.X[0:40, :] trainY = self.y[0:40] paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict) idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY) idealPenalties2[i, j] = penalty args = (trainX, trainY, self.X, self.y, svm) idealPenalties3[i, j] = computeIdealPenalty(args) tol = 10**-6 self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol) def testGetBestLearner(self): svm = self.svm paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() errors = numpy.random.rand(svm.getCs().shape[0], svm.getGammas().shape[0]) folds = 5 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm.normModelSelect = True svm.setKernel("gaussian") learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx) bestC = learner.getC() #Find the best norm bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape) learner.setC(svm.getCs()[bestInds[0]]) learner.setGamma(svm.getGammas()[bestInds[1]]) norms = [] for trainInds, testInds in idx: validX = self.X[trainInds, :] validY = self.y[trainInds] learner.learnModel(validX, validY) norms.append(learner.weightNorm()) bestNorm = numpy.array(norms).mean() norms = numpy.zeros(paramDict["setC"].shape[0]) for i, C in enumerate(paramDict["setC"]): learner.setC(C) learner.learnModel(self.X, self.y) norms[i] = learner.weightNorm() bestC2 = paramDict["setC"][numpy.abs(norms-bestNorm).argmin()] self.assertEquals(bestC, bestC2)
def testGetC(self): svm = LibSVM() svm.setC(10.0) C = svm.getC() self.assertTrue(C == 10.0)
def runToyExp(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, numProcesses, fileNameSuffix): dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/" outputDir = PathDefaults.getOutputDir() + "modelPenalisation/" svm = LibSVM() numCs = svm.getCs().shape[0] numGammas = svm.getGammas().shape[0] numMethods = 1 + (1 + cvScalings.shape[0]) numParams = 2 runIdeal = True runCv = True runVfpen = True for i in range(len(datasetNames)): datasetName = datasetNames[i][0] numRealisations = datasetNames[i][1] logging.debug("Learning using dataset " + datasetName) for s in range(len(sampleMethods)): sampleMethod = sampleMethods[s][1] outfileName = outputDir + datasetName + sampleMethods[s][0] + fileNameSuffix fileLock = FileLock(outfileName + ".npz") if not fileLock.isLocked() and not fileLock.fileExists(): fileLock.lock() errors = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods)) params = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numParams)) errorGrids = numpy.zeros( (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas) ) approxGrids = numpy.zeros( (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas) ) idealGrids = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numCs, numGammas)) data = numpy.load(dataDir + datasetName + ".npz") gridPoints, trainX, trainY, pdfX, pdfY1X, pdfYminus1X = ( data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"], ) # We form a test set from the grid points testX = numpy.zeros((gridPoints.shape[0] ** 2, 2)) for m in range(gridPoints.shape[0]): testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 0] = gridPoints testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 1] = gridPoints[m] for j in range(numRealisations): Util.printIteration(j, 1, numRealisations, "Realisation: ") for k in range(sampleSizes.shape[0]): sampleSize = sampleSizes[k] for m in range(foldsSet.shape[0]): folds = foldsSet[m] logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds") perm = numpy.random.permutation(trainX.shape[0]) trainInds = perm[0:sampleSize] validX = trainX[trainInds, :] validY = trainY[trainInds] svm = LibSVM(processes=numProcesses) # Find ideal penalties if runIdeal: logging.debug("Finding ideal grid of penalties") idealGrids[j, k, m, :, :] = parallelPenaltyGridRbf( svm, validX, validY, testX, gridPoints, pdfX, pdfY1X, pdfYminus1X ) # Cross validation if runCv: logging.debug("Running V-fold cross validation") methodInd = 0 idx = sampleMethod(folds, validY.shape[0]) if sampleMethod == Sampling.bootstrap: bootstrap = True else: bootstrap = False bestSVM, cvGrid = svm.parallelVfcvRbf(validX, validY, idx, True, bootstrap) predY, decisionsY = bestSVM.predict(testX, True) decisionGrid = numpy.reshape( decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F" ) errors[j, k, m, methodInd] = ModelSelectUtils.bayesError( gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X ) params[j, k, m, methodInd, :] = numpy.array([bestSVM.getC(), bestSVM.getKernelParams()]) errorGrids[j, k, m, methodInd, :, :] = cvGrid # v fold penalisation if runVfpen: logging.debug("Running penalisation") # BIC penalisation Cv = float((folds - 1) * numpy.log(validX.shape[0]) / 2) tempCvScalings = cvScalings * (folds - 1) tempCvScalings = numpy.insert(tempCvScalings, 0, Cv) # Use cross validation idx = sampleMethod(folds, validY.shape[0]) svmGridResults = svm.parallelVfPenRbf(validX, validY, idx, tempCvScalings) for n in range(len(tempCvScalings)): bestSVM, trainErrors, approxGrid = svmGridResults[n] methodInd = n + 1 predY, decisionsY = bestSVM.predict(testX, True) decisionGrid = numpy.reshape( decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F" ) errors[j, k, m, methodInd] = ModelSelectUtils.bayesError( gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X ) params[j, k, m, methodInd, :] = numpy.array( [bestSVM.getC(), bestSVM.getKernelParams()] ) errorGrids[j, k, m, methodInd, :, :] = trainErrors + approxGrid approxGrids[j, k, m, methodInd, :, :] = approxGrid meanErrors = numpy.mean(errors, 0) print(meanErrors) meanParams = numpy.mean(params, 0) print(meanParams) meanErrorGrids = numpy.mean(errorGrids, 0) stdErrorGrids = numpy.std(errorGrids, 0) meanIdealGrids = numpy.mean(idealGrids, 0) stdIdealGrids = numpy.std(idealGrids, 0) meanApproxGrids = numpy.mean(approxGrids, 0) stdApproxGrids = numpy.std(approxGrids, 0) numpy.savez( outfileName, errors, params, meanErrorGrids, stdErrorGrids, meanIdealGrids, stdIdealGrids, meanApproxGrids, stdApproxGrids, ) logging.debug("Saved results as file " + outfileName + ".npz") fileLock.unlock() else: logging.debug("Results already computed") logging.debug("All done!")
def testGetGamma(self): svm = LibSVM() svm.setKernel("gaussian", 12.0) gamma = svm.getKernelParams() self.assertTrue(gamma == 12.0)
from apgl.util import Util logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.seterr(all="raise") numpy.random.seed(21) dataDir = PathDefaults.getDataDir() dataDir += "modelPenalisation/regression/" outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/" figInd = 0 loadMethod = ModelSelectUtils.loadRegressDataset datasets = ModelSelectUtils.getRegressionDatasets(True) numProcesses = multiprocessing.cpu_count() learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR") learner.setChunkSize(3) Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) epsilons = learner.getEpsilons() numCs = Cs.shape[0] numGammas = gammas.shape[0] paramDict = {} paramDict["setC"] = Cs paramDict["setGamma"] = gammas paramDict["setEpsilon"] = epsilons print(learner)
def testGetWeights(self): try: import sklearn except ImportError as error: return numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float64) #X = numpy.random.rand(numExamples, 10) y = numpy.array([[-1], [-1], [-1], [1], [1], [1]]) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.zeros(numExamples) decisions2 = numpy.dot(X, weights) - b self.assertTrue((decisions == decisions2).all()) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) #Do the same test on a random datasets numExamples = 50 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) y = numpy.sign(numpy.random.rand(numExamples) - 0.5) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.dot(X, weights) + b tol = 10**-6 self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all())
import logging import numpy import os datasets = ModelSelectUtils.getRegressionDatasets() numProcesses = 8 dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" datasetName = datasets[9] print(datasetName) j = 0 trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j) learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner.getEpsilons() foldsSet = numpy.arange(2, 31, 2) Cvs = numpy.array([1.0]) sampleMethod = Sampling.crossValidation sampleSize = 100 trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize] validX = trainX[trainInds,:] validY = trainY[trainInds]
class SvmEgoSimulator(AbstractDiffusionSimulator): """ A class which combines SVM classification with the EgoSimulation. There are methods to run modelSelection, train the SVM and then run the simulation. The simulation itself is run using EgoSimulator. """ def __init__(self, examplesFileName): """ Create the class by reading examples from a Matlab file. Instantiate the SVM and create a preprocesor to standarise examples to have zero mean and unit variance. """ self.examplesList = ExamplesList.readFromFile(examplesFileName) self.examplesList.setDefaultExamplesName("X") self.examplesList.setLabelsName("y") (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel()) logging.info("Distribution of labels: " + str((freqs, items))) logging.info("The base error rate is " + str(float(min(freqs))/self.examplesList.getNumExamples())) self.classifier = LibSVM() self.errorMethod = Evaluator.balancedError self.preprocessor = Standardiser() X = self.preprocessor.standardiseArray(self.examplesList.getDataField(self.examplesList.getDefaultExamplesName())) self.examplesList.overwriteDataField(self.examplesList.getDefaultExamplesName(), X) def getPreprocessor(self): """ Returns the preprocessor """ return self.preprocessor def sampleExamples(self, sampleSize): """ This function exists so that we can sample the same examples used in model selection and exclude them when running evaluateClassifier. """ self.examplesList.randomSubData(sampleSize) def modelSelection(self, Cs, kernel, kernelParams, errorCosts, folds, sampleSize): """ Perform model selection using an SVM """ Parameter.checkInt(sampleSize, 0, self.examplesList.getNumExamples()) Parameter.checkInt(folds, 0, sampleSize) Parameter.checkString(kernel, ["linear", "polynomial", "gaussian"]) Parameter.checkList(Cs, Parameter.checkFloat, [0.0, float("inf")]) Parameter.checkList(errorCosts, Parameter.checkFloat, [0.0, float("inf")]) #Perform model selection self.examplesList.randomSubData(sampleSize) (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel()) logging.info("Using " + str(sampleSize) + " examples for model selection") logging.info("Distribution of labels: " + str((freqs, items))) logging.info("List of Cs " + str(Cs)) logging.info("List of kernels " + str(kernel)) logging.info("List of kernelParams " + str(kernelParams)) logging.info("List of errorCosts " + str(errorCosts)) CVal, kernelParamVal, errorCost, error = self.classifier.cvModelSelection(self.examplesList, Cs, kernelParams, kernel, folds, errorCosts, self.errorMethod) logging.info("Model selection returned C = " + str(CVal) + " kernelParam = " + str(kernelParamVal) + " errorCost = " + str(errorCost) + " with error " + str(error)) return CVal, kernelParamVal, errorCost, error def evaluateClassifier(self, CVal, kernel, kernelParamVal, errorCost, folds, sampleSize, invert=True): """ Evaluate the SVM with the given parameters. Often model selection is done before this step and in that case, invert=True uses a sample excluding those used for model selection. """ Parameter.checkFloat(CVal, 0.0, float('inf')) Parameter.checkFloat(errorCost, 0.0, float('inf')) Parameter.checkString(kernel, ["linear", "polynomial", "gaussian"]) if kernel == "gaussian": Parameter.checkFloat(kernelParamVal, 0.0, float('inf')) elif kernel == "polynomial": Parameter.checkInt(kernelParamVal, 2, float('inf')) Parameter.checkInt(folds, 0, sampleSize) Parameter.checkInt(sampleSize, 0, self.examplesList.getNumExamples()) if invert: allIndices = numpy.array(list(range(0, self.examplesList.getNumExamples()))) testIndices = numpy.setdiff1d(allIndices, self.examplesList.getPermutationIndices()) testIndices = numpy.random.permutation(testIndices)[0:sampleSize] else: testIndices = Util.sampleWithoutReplacement(sampleSize, self.examplesList.getNumExamples()) logging.info("Using " + str(testIndices.shape[0]) + " examples for SVM evaluation") self.examplesList.setPermutationIndices(testIndices) self.classifier.setParams(C=CVal, kernel=kernel, kernelParam=kernelParamVal) self.classifier.setErrorCost(errorCost) (means, vars) = self.classifier.evaluateCv(self.examplesList, folds) logging.info("--- Classification evaluation ---") logging.info("Error on " + str(testIndices.shape[0]) + " examples is " + str(means[0]) + "(" + str(vars[0]) + ")") logging.info("Sensitivity (recall = TP/(TP+FN)): " + str(means[1]) + "(" + str(vars[1]) + ")") logging.info("Specificity (TN/TN+FP): " + str(means[2]) + "(" + str(vars[2]) + ")") logging.info("Error on positives: " + str(means[3]) + "(" + str(vars[3]) + ")") logging.info("Error on negatives: " + str(means[4]) + "(" + str(vars[4]) + ")") logging.info("Balanced error: " + str(means[5]) + "(" + str(vars[5]) + ")") return (means, vars) def trainClassifier(self, CVal, kernel, kernelParamVal, errorCost, sampleSize): Parameter.checkFloat(CVal, 0.0, float('inf')) Parameter.checkString(kernel, ["linear", "gaussian", "polynomial"]) Parameter.checkFloat(kernelParamVal, 0.0, float('inf')) Parameter.checkFloat(errorCost, 0.0, float('inf')) Parameter.checkInt(sampleSize, 0, self.examplesList.getNumExamples()) logging.info("Training SVM with C=" + str(CVal) + ", " + kernel + " kernel" + ", param=" + str(kernelParamVal) + ", sampleSize=" + str(sampleSize) + ", errorCost=" + str(errorCost)) self.examplesList.randomSubData(sampleSize) self.classifier.setC(C=CVal) self.classifier.setKernel(kernel=kernel, kernelParam=kernelParamVal) self.classifier.setErrorCost(errorCost) X = self.examplesList.getSampledDataField(self.examplesList.getDefaultExamplesName()) y = self.examplesList.getSampledDataField(self.examplesList.getLabelsName()) y = y.ravel() self.classifier.learnModel(X, y) return self.classifier def getWeights(self): return self.classifier.getWeights() def runSimulation(self, maxIterations): Parameter.checkInt(maxIterations, 1, float('inf')) #Notice that the data is preprocessed in the same way as the survey data egoSimulator = EgoSimulator(self.graph, self.classifier, self.preprocessor) totalInfo = numpy.zeros(maxIterations+1) totalInfo[0] = EgoUtils.getTotalInformation(self.graph) logging.info("Total number of people with information: " + str(totalInfo[0])) logging.info("--- Simulation Started ---") for i in range(0, maxIterations): logging.info("--- Iteration " + str(i) + " ---") self.graph = egoSimulator.advanceGraph() totalInfo[i+1] = EgoUtils.getTotalInformation(self.graph) logging.info("Total number of people with information: " + str(totalInfo[i+1])) #Compute distribution of ages etc. in alters alterIndices = egoSimulator.getAlters(i) alterAges = numpy.zeros(len(alterIndices)) alterGenders = numpy.zeros(len(alterIndices)) for j in range(0, len(alterIndices)): currentVertex = self.graph.getVertex(alterIndices[j]) alterAges[j] = currentVertex[self.egoQuestionIds.index(("Q5X", 0))] alterGenders[j] = currentVertex[self.egoQuestionIds.index(("Q4", 0))] (freqs, items) = Util.histogram(alterAges) logging.info("Distribution of ages " + str(freqs) + " " + str(items)) (freqs, items) = Util.histogram(alterGenders) logging.info("Distribution of genders " + str(freqs) + " " + str(items)) logging.info("--- Simulation Finished ---") return totalInfo, egoSimulator.getTransmissions() def getVertexFeatureDistribution(self, fIndex, vIndices=None): return self.graph.getVertexFeatureDistribution(fIndex, vIndices) def getPreProcessor(self): return self.preprocessor def getClassifier(self): return self.classifier preprocessor = None examplesList = None classifier = None graph = None edgeWeight = 1