Exemplo n.º 1
0
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)
Exemplo n.º 2
0
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)
Exemplo n.º 3
0
class LibSVMTest(unittest.TestCase):
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

    def testLearnModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Test for wrong labels
        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float)
        y = numpy.array([[-1], [-1], [-1], [1], [1], [5]])

        self.assertRaises(ValueError, self.svm.learnModel, X, y)

        #Try the regression SVM
        svm = LibSVM(type="Epsilon_SVR")
        y = numpy.random.rand(self.X.shape[0])
        svm.learnModel(self.X, self.y)

    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)

    def testClassify(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Now, permute examples
        perm = numpy.random.permutation(self.X.shape[0])
        predY = self.svm.classify(self.X[perm, :])
        y = y[perm]

        e2 = Evaluator.binaryError(y, predY)

        self.assertEquals(e, e2)

    def testEvaluateCv(self):
        try:
            import sklearn
        except ImportError as error:
            return

        folds = 10
        (means, vars) = self.svm.evaluateCv(self.X, self.y, folds)

        self.assertTrue((means <= 1).all())
        self.assertTrue((means >= 0).all())
        self.assertTrue((vars <= 1).all())
        self.assertTrue((vars >= 0).all())

    @apgl.skip("")
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b = svm.getWeights()

        #logging.debug(weights)
        #logging.debug(b)

    @apgl.skip("")
    def testGetWeights(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float64)
        #X = numpy.random.rand(numExamples, 10)
        y = numpy.array([[-1], [-1], [-1], [1], [1], [1]])

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.zeros(numExamples)
        decisions2 = numpy.dot(X, weights) - b

        self.assertTrue((decisions == decisions2).all())
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

        #Do the same test on a random datasets
        numExamples = 50
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        y = numpy.sign(numpy.random.rand(numExamples) - 0.5)

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.dot(X, weights) + b

        tol = 10**-6

        self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol)
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

    def testSetTermination(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        self.svm.setTermination(0.1)
        self.svm.learnModel(self.X, self.y)

    def testSetSvmType(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        X = Standardiser().standardiseArray(X)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + 1
        y2 = numpy.array(y > 0, numpy.int32) * 2 - 1

        svm = LibSVM()

        svm.setSvmType("Epsilon_SVR")

        self.assertEquals(svm.getType(), "Epsilon_SVR")

        #Try to get a good error
        Cs = 2**numpy.arange(-6, 4, dtype=numpy.float)
        epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float)

        bestError = 10
        for C in Cs:
            for epsilon in epsilons:
                svm.setEpsilon(epsilon)
                svm.setC(C)
                svm.learnModel(X, y)
                yp = svm.predict(X)

                if Evaluator.rootMeanSqError(y, yp) < bestError:
                    bestError = Evaluator.rootMeanSqError(y, yp)

        self.assertTrue(
            bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0])))

        svm.setSvmType("C_SVC")
        svm.learnModel(X, y2)
        yp2 = svm.predict(X)

        self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)

    @apgl.skip("")
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)

    def testStr(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()

        #logging.debug(svm)

    def testSetEpsilon(self):
        """
        Test out the parameter for the regressive SVM, vary epsilon and look at
        number of support vectors. 
        """
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.0)
        svm.setEpsilon(0.1)
        svm.setSvmType("Epsilon_SVR")

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100)

        svm.setEpsilon(1.0)
        svm.learnModel(X, y)
        numSV = svm.getModel().support_.shape

        svm.setEpsilon(0.5)
        svm.learnModel(X, y)
        numSV2 = svm.getModel().support_.shape

        svm.setEpsilon(0.01)
        svm.learnModel(X, y)
        numSV3 = svm.getModel().support_.shape

        #There should be fewer SVs as epsilon increases
        self.assertTrue(numSV < numSV2)
        self.assertTrue(numSV2 < numSV3)

    def testPredict(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel()
        y = numpy.array(y > 0, numpy.int32) * 2 - 1

        svm = LibSVM()
        svm.learnModel(X, y)
        y2, d = svm.predict(X, True)

        #self.assertTrue((numpy.sign(d) == y2).all())

    #@unittest.skip("")
    def testParallelVfcvRbf(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error / len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    def testParallelVfcvRbf2(self):
        #In this test we try SVM regression
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X,
                                                  self.y,
                                                  idx,
                                                  type="Epsilon_SVR")

        tol = 10**-6
        bestError = 100
        meanErrors2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]

                    error = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]
                        testX = self.X[testInds, :]
                        testY = self.y[testInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(testX)
                        error += svm.getMetricMethod()(predY, testY)

                    meanErrors2[j, k, i] = error / len(idx)

                    if error < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestError = error
                        bestEpsilon = epsilon

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)

    def testParallelVfPenRbf(self):
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(
                        predY, self.y) - Evaluator.binaryError(
                            predTrainY, trainY)

                penalty = penalty * Cv[0] / len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                meanErrors2[i,
                            j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    #@unittest.skip("")
    def testParallelVfPenRbf2(self):
        #Test support vector regression
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        resultsList = svm.parallelVfPenRbf(self.X,
                                           self.y,
                                           idx,
                                           Cv,
                                           type="Epsilon_SVR")

        tol = 10**-6
        bestError = 100
        meanErrors2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]

                    penalty = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(self.X)
                        predTrainY = svm.predict(trainX)
                        penalty += svm.getMetricMethod()(
                            predY, self.y) - svm.getMetricMethod()(predTrainY,
                                                                   trainY)

                    penalty = penalty * Cv[0] / len(idx)
                    svm.learnModel(self.X, self.y)
                    predY = svm.predict(self.X)
                    meanErrors2[j, k, i] = svm.getMetricMethod()(
                        predY, self.y) + penalty

                    if meanErrors2[j, k, i] < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestEpsilon = epsilon
                        bestError = meanErrors2[j, k, i]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)

    def testGetC(self):
        svm = LibSVM()
        svm.setC(10.0)
        C = svm.getC()
        self.assertTrue(C == 10.0)

    def testGetGamma(self):
        svm = LibSVM()
        svm.setKernel("gaussian", 12.0)
        gamma = svm.getKernelParams()
        self.assertTrue(gamma == 12.0)

    def testComputeTestError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0] * 0.5

        trainX, trainY = self.X[
            0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)
        args = (trainX, trainY, testX, testY, svm)
        error = computeTestError(args)

        svm = LibSVM('gaussian', gamma, C)
        svm.learnModel(trainX, trainY)
        predY = svm.predict(testX)
        self.assertEquals(Evaluator.binaryError(predY, testY), error)

    def testComputeBootstrapError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0] * 0.5

        trainX, trainY = self.X[
            0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)

        args = (trainX, trainY, testX, testY, svm)
        error = computeBootstrapError(args)

    def testComputeIdealPenalty(self):
        C = 10.0
        gamma = 0.5
        svm = LibSVM("gaussian", gamma, C)
        args = (self.X, self.y, self.X, self.y, svm)
        error = computeIdealPenalty(args)

    def testParallelPenaltyGridRbf(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X,
                                                    self.y)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(
                    predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)

    def testParallelPenaltyGridRbf2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX,
                                                    trainY,
                                                    self.X,
                                                    self.y,
                                                    type="Epsilon_SVR")
        idealPenalties2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(
                        predY, self.y) - svm.getMetricMethod()(predTrainY,
                                                               trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)

    def testParallelModelSelect(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx,
                                                      paramDict)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        print("Computing real grid")

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error / len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    def testParallelPenaltyGrid2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()
        paramDict["setEpsilon"] = svm.getEpsilons()

        #print(paramDict.keys())

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X,
                                                 self.y, paramDict)
        idealPenalties2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(
                        predY, self.y) - svm.getMetricMethod()(predTrainY,
                                                               trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)

    def testParallelPen(self):
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv)

        tol = 10**-6
        bestError = 1
        trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(
                        predY, self.y) - Evaluator.binaryError(
                            predTrainY, trainY)

                penalty = penalty * Cv[0] / len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                trainErrors2[i, j] = Evaluator.binaryError(predY, self.y)
                penalties2[i, j] = penalty
                meanErrors2[i,
                            j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
        self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol)
        self.assertTrue(
            numpy.linalg.norm(penalties2.T - currentPenalties) < tol)

    def testParallelPenaltyGrid(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X,
                                                 self.y, paramDict)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(
                    predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)

    def testGetBestLearner(self):
        svm = self.svm
        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        errors = numpy.random.rand(svm.getCs().shape[0],
                                   svm.getGammas().shape[0])

        folds = 5
        idx = Sampling.crossValidation(folds, self.X.shape[0])

        svm.normModelSelect = True
        svm.setKernel("gaussian")
        learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx)

        bestC = learner.getC()

        #Find the best norm
        bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape)
        learner.setC(svm.getCs()[bestInds[0]])
        learner.setGamma(svm.getGammas()[bestInds[1]])

        norms = []
        for trainInds, testInds in idx:
            validX = self.X[trainInds, :]
            validY = self.y[trainInds]
            learner.learnModel(validX, validY)

            norms.append(learner.weightNorm())

        bestNorm = numpy.array(norms).mean()

        norms = numpy.zeros(paramDict["setC"].shape[0])
        for i, C in enumerate(paramDict["setC"]):
            learner.setC(C)
            learner.learnModel(self.X, self.y)
            norms[i] = learner.weightNorm()

        bestC2 = paramDict["setC"][numpy.abs(norms - bestNorm).argmin()]

        self.assertEquals(bestC, bestC2)
Exemplo n.º 4
0
class LibSVMTest(unittest.TestCase):
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return 

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

    def testLearnModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Test for wrong labels
        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float)
        y = numpy.array([[-1], [-1], [-1], [1], [1] ,[5]])

        self.assertRaises(ValueError, self.svm.learnModel, X, y)

        #Try the regression SVM
        svm = LibSVM(type="Epsilon_SVR")
        y = numpy.random.rand(self.X.shape[0])
        svm.learnModel(self.X, self.y)
        

    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)

    def testClassify(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Now, permute examples
        perm = numpy.random.permutation(self.X.shape[0])
        predY = self.svm.classify(self.X[perm, :])
        y = y[perm]

        e2 = Evaluator.binaryError(y, predY)

        self.assertEquals(e, e2)

    def testEvaluateCv(self):
        try:
            import sklearn
        except ImportError as error:
            return

        folds = 10
        (means, vars) = self.svm.evaluateCv(self.X, self.y, folds)

        self.assertTrue((means <= 1).all())
        self.assertTrue((means>= 0).all())
        self.assertTrue((vars <= 1).all())
        self.assertTrue((vars>= 0).all())

    @apgl.skip("")
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b  = svm.getWeights()

        #logging.debug(weights)
        #logging.debug(b)
        

    @apgl.skip("")
    def testGetWeights(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float64)
        #X = numpy.random.rand(numExamples, 10)
        y = numpy.array([[-1], [-1], [-1], [1], [1] ,[1]])

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b  = svm.getWeights()

        #Let's see if we can compute the decision values 
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.zeros(numExamples)
        decisions2 = numpy.dot(X, weights) - b

        self.assertTrue((decisions == decisions2).all())
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

        #Do the same test on a random datasets
        numExamples = 50
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        y = numpy.sign(numpy.random.rand(numExamples)-0.5)

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b  = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.dot(X, weights) + b

        tol = 10**-6

        self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol)
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

    def testSetTermination(self):
        try:
            import sklearn
        except ImportError as error:
            return


        self.svm.learnModel(self.X, self.y)
        self.svm.setTermination(0.1)
        self.svm.learnModel(self.X, self.y)

    def testSetSvmType(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        X = Standardiser().standardiseArray(X)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + 1
        y2 = numpy.array(y > 0, numpy.int32)*2 -1 
        
        svm = LibSVM()

        svm.setSvmType("Epsilon_SVR")

        self.assertEquals(svm.getType(), "Epsilon_SVR")

        #Try to get a good error
        Cs = 2**numpy.arange(-6, 4, dtype=numpy.float)
        epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float)

        bestError = 10 
        for C in Cs:
            for epsilon in epsilons:
                svm.setEpsilon(epsilon)
                svm.setC(C)
                svm.learnModel(X, y)
                yp = svm.predict(X)

                if Evaluator.rootMeanSqError(y, yp) < bestError:
                    bestError = Evaluator.rootMeanSqError(y, yp) 

        self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0])))
        
        svm.setSvmType("C_SVC")
        svm.learnModel(X, y2)
        yp2 = svm.predict(X)

        self.assertTrue(0 <= Evaluator.binaryError(y2, yp2)  <= 1)

    @apgl.skip("")
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)

    def testStr(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()

        #logging.debug(svm)

    def testSetEpsilon(self):
        """
        Test out the parameter for the regressive SVM, vary epsilon and look at
        number of support vectors. 
        """
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.0)
        svm.setEpsilon(0.1)
        svm.setSvmType("Epsilon_SVR")

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100)
        
        svm.setEpsilon(1.0)
        svm.learnModel(X, y)
        numSV = svm.getModel().support_.shape
        
        svm.setEpsilon(0.5)
        svm.learnModel(X, y)
        numSV2 = svm.getModel().support_.shape

        svm.setEpsilon(0.01)
        svm.learnModel(X, y)
        numSV3 = svm.getModel().support_.shape

        #There should be fewer SVs as epsilon increases
        self.assertTrue(numSV < numSV2)
        self.assertTrue(numSV2 < numSV3)

    def testPredict(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel()
        y = numpy.array(y > 0, numpy.int32)*2 -1

        svm = LibSVM()
        svm.learnModel(X, y)
        y2, d = svm.predict(X, True)

        #self.assertTrue((numpy.sign(d) == y2).all())

    #@unittest.skip("")
    def testParallelVfcvRbf(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx)

        tol = 10**-6 
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) 

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error/len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    def testParallelVfcvRbf2(self):
        #In this test we try SVM regression 
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx, type="Epsilon_SVR")

        tol = 10**-6
        bestError = 100
        meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]

                    error = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]
                        testX = self.X[testInds, :]
                        testY = self.y[testInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(testX)
                        error += svm.getMetricMethod()(predY, testY)

                    meanErrors2[j, k, i] = error/len(idx)

                    if error < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestError = error
                        bestEpsilon = epsilon

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)

    def testParallelVfPenRbf(self):
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                penalty = penalty*Cv[0]/len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    #@unittest.skip("")
    def testParallelVfPenRbf2(self):
        #Test support vector regression 
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv, type="Epsilon_SVR")

        tol = 10**-6 
        bestError = 100
        meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    
                    penalty = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(self.X)
                        predTrainY = svm.predict(trainX)
                        penalty += svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)

                    penalty = penalty*Cv[0]/len(idx)
                    svm.learnModel(self.X, self.y)
                    predY = svm.predict(self.X)
                    meanErrors2[j, k, i] = svm.getMetricMethod()(predY, self.y) + penalty

                    if meanErrors2[j, k, i] < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestEpsilon = epsilon 
                        bestError = meanErrors2[j, k, i]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)


    def testGetC(self):
        svm = LibSVM()
        svm.setC(10.0)
        C = svm.getC()
        self.assertTrue(C == 10.0)

    def testGetGamma(self):
        svm = LibSVM()
        svm.setKernel("gaussian", 12.0)
        gamma = svm.getKernelParams()
        self.assertTrue(gamma == 12.0)

    def testComputeTestError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0]*0.5

        trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)
        args = (trainX, trainY, testX, testY, svm)
        error = computeTestError(args)

        svm = LibSVM('gaussian', gamma, C)
        svm.learnModel(trainX, trainY)
        predY = svm.predict(testX)
        self.assertEquals(Evaluator.binaryError(predY, testY), error)

    def testComputeBootstrapError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0]*0.5

        trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]
        
        svm = LibSVM('gaussian', gamma, C)

        args = (trainX, trainY, testX, testY, svm)
        error = computeBootstrapError(args)



    def testComputeIdealPenalty(self):
        C = 10.0
        gamma = 0.5
        svm = LibSVM("gaussian", gamma, C)
        args = (self.X, self.y, self.X, self.y, svm)
        error = computeIdealPenalty(args)

    def testParallelPenaltyGridRbf(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6 
        self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)


    def testParallelPenaltyGridRbf2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y, type="Epsilon_SVR")
        idealPenalties2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)


    def testParallelModelSelect(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()    
        
        bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict)
        
        tol = 10**-6 
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) 
        print("Computing real grid")

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error/len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error
            
        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)


    def testParallelPenaltyGrid2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]
        
        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()  
        paramDict["setEpsilon"] = svm.getEpsilons()
        
        #print(paramDict.keys())

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict)
        idealPenalties2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)

    def testParallelPen(self): 
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()            
        
        resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv)
        
        tol = 10**-6
        bestError = 1
        trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                penalty = penalty*Cv[0]/len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                trainErrors2[i, j] = Evaluator.binaryError(predY, self.y)
                penalties2[i, j] = penalty
                meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
        self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol)
        self.assertTrue(numpy.linalg.norm(penalties2.T - currentPenalties) < tol)

    def testParallelPenaltyGrid(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]
        
        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()      

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6 
        self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)

    def testGetBestLearner(self): 
        svm = self.svm
        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()      

        errors = numpy.random.rand(svm.getCs().shape[0], svm.getGammas().shape[0])

        folds = 5 
        idx = Sampling.crossValidation(folds, self.X.shape[0])

        svm.normModelSelect = True 
        svm.setKernel("gaussian")
        learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx)
        
        bestC = learner.getC()
        
        #Find the best norm 
        bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape)
        learner.setC(svm.getCs()[bestInds[0]])
        learner.setGamma(svm.getGammas()[bestInds[1]])              
        
        norms = []
        for trainInds, testInds in idx: 
            validX = self.X[trainInds, :]
            validY = self.y[trainInds]
            learner.learnModel(validX, validY)
            
            norms.append(learner.weightNorm())  
        
        bestNorm = numpy.array(norms).mean()
        
        norms = numpy.zeros(paramDict["setC"].shape[0]) 
        for i, C in enumerate(paramDict["setC"]): 
            learner.setC(C)
            learner.learnModel(self.X, self.y)
            norms[i] = learner.weightNorm()            
            
        bestC2 = paramDict["setC"][numpy.abs(norms-bestNorm).argmin()]
        
        self.assertEquals(bestC, bestC2)