Пример #1
0
    def profileLearnModel(self):
        #Profile full gradient descent
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        #X, U, V = DatasetUtils.syntheticDataset1()
        #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2)
        #X = DatasetUtils.flixster()

        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        maxLocalAuc = MaxLocalAUC(self.k,
                                  w,
                                  alpha=alpha,
                                  eps=eps,
                                  stochastic=True)
        maxLocalAuc.maxNormU = 10
        maxLocalAuc.maxNormV = 10
        maxLocalAuc.maxIterations = 100
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "constant"
        maxLocalAuc.parallelSGD = True
        maxLocalAuc.numProcesses = 8
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.numRowSamples = 30
        maxLocalAuc.scaleAlpha = False
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0.0
        print(maxLocalAuc)

        ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
Пример #2
0
    def testOverfit(self):
        """
        See if we can get a zero objective on the hinge loss 
        """
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        k = 10
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "constant"
        maxLocalAuc.maxIterations = 500
        maxLocalAuc.numProcesses = 1
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0
        maxLocalAuc.lmbda = 0

        print("Overfit example")
        U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
            X, verbose=True)

        self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Пример #3
0
    def testScale(self):
        """
        Look at the scales of the unnormalised gradients. 
        """

        m = 100
        n = 400
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        normDui = 0
        for i in range(m):
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDui += numpy.linalg.norm(du)

        normDui /= float(m)
        print(normDui)

        normDvi = 0

        for i in range(n):
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDvi += numpy.linalg.norm(dv)

        normDvi /= float(n)
        print(normDvi)
Пример #4
0
    def testLearningRateSelect(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.numProcesses = 1

        maxLocalAuc.learningRateSelect(X)
Пример #5
0
    def testLearnModel(self):
        m = 50
        n = 20
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        u = 0.1
        w = 1 - u
        eps = 0.05

        maxLocalAuc = MaxLocalAUC(k, w, alpha=5.0, eps=eps, stochastic=False)
        U, V = maxLocalAuc.learnModel(X)

        maxLocalAuc.stochastic = True
        U, V = maxLocalAuc.learnModel(X)

        #Test case where we do not have validation set
        maxLocalAuc.validationUsers = 0.0
        U, V = maxLocalAuc.learnModel(X)
Пример #6
0
    def testModelSelectMaxNorm(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        os.system('taskset -p 0xffffffff %d' % os.getpid())

        eps = 0.001
        k = 5
        maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.recordStep = 1
        maxLocalAuc.validationSize = 3
        maxLocalAuc.metric = "f1"

        maxLocalAuc.modelSelectNorm(X)
    def profileDerivativeVjApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq,
                                                    self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 1
            for i in range(numRuns):
                for j in range(self.n):
                    learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq,
                                               normGp, normGq, permutedRowInds,
                                               permutedColInds, i)

        ProfileUtils.profile('run()', globals(), locals())
Пример #8
0
    def profileLearnModel2(self):
        #Profile stochastic case
        #X = DatasetUtils.flixster()
        #X = Sampling.sampleUsers(X, 1000)
        X, U, V = DatasetUtils.syntheticDataset1(u=0.001, m=10000, n=1000)

        rho = 0.00
        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        k = self.k
        maxLocalAuc = MaxLocalAUC(k, w, alpha=alpha, eps=eps, stochastic=True)
        maxLocalAuc.numRowSamples = 2
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.maxIterations = 1
        maxLocalAuc.numRecordAucSamples = 100
        maxLocalAuc.recordStep = 10
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "optimal"
        #maxLocalAuc.parallelSGD = True

        trainTestX = Sampling.shuffleSplitRows(X, maxLocalAuc.folds, 5)
        trainX, testX = trainTestX[0]

        def run():
            U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
                trainX, True)
            #logging.debug("Train Precision@5=" + str(MCEvaluator.precisionAtK(trainX, U, V, 5)))
            #logging.debug("Train Precision@10=" + str(MCEvaluator.precisionAtK(trainX, U, V, 10)))
            #logging.debug("Train Precision@20=" + str(MCEvaluator.precisionAtK(trainX, U, V, 20)))
            #logging.debug("Train Precision@50=" + str(MCEvaluator.precisionAtK(trainX, U, V, 50)))

            #logging.debug("Test Precision@5=" + str(MCEvaluator.precisionAtK(testX, U, V, 5)))
            #logging.debug("Test Precision@10=" + str(MCEvaluator.precisionAtK(testX, U, V, 10)))
            #logging.debug("Test Precision@20=" + str(MCEvaluator.precisionAtK(testX, U, V, 20)))
            #logging.debug("Test Precision@50=" + str(MCEvaluator.precisionAtK(testX, U, V, 50)))

        ProfileUtils.profile('run()', globals(), locals())
Пример #9
0
    def testParallelLearnModel(self):
        numpy.random.seed(21)
        m = 500
        n = 200
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        u = 0.1
        w = 1 - u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4

        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz / maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
Пример #10
0
    def testDerivativeViApprox(self):
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20
        n = 30
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0

        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        numRuns = 200
        numTests = 5

        #Let's compare against using the exact derivative
        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            dv3 = numpy.zeros(k)
            for j in range(k):
                eps = 10**-6
                tempV = V.copy()
                tempV[i, j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                tempV = V.copy()
                tempV[i, j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                dv3[j] = (obj1 - obj2) / (2 * eps)

            print(dv1, dv2, dv3)

            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.lmbdaV = 0.5
        learner.rho = 0.5

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.numRowSamples = 10
        numRuns = 1000

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m
        maxLocalAuc.numAucSamples = 20
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")

        #for i in numpy.random.permutation(m)[0:numTests]:
        for i in range(m):
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
Пример #11
0
 def testCopy(self):
     u = 0.1
     eps = 0.001
     k = 10
     maxLocalAuc = MaxLocalAUC(k, u, alpha=5.0, eps=eps)
     maxLocalAuc.copy()
Пример #12
0
 def testStr(self):
     k = 10
     u = 0.1
     eps = 0.001
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps)