Пример #1
0
    def profileLearnModel(self):
        #Profile full gradient descent
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        #X, U, V = DatasetUtils.syntheticDataset1()
        #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2)
        #X = DatasetUtils.flixster()

        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        maxLocalAuc = MaxLocalAUC(self.k,
                                  w,
                                  alpha=alpha,
                                  eps=eps,
                                  stochastic=True)
        maxLocalAuc.maxNormU = 10
        maxLocalAuc.maxNormV = 10
        maxLocalAuc.maxIterations = 100
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "constant"
        maxLocalAuc.parallelSGD = True
        maxLocalAuc.numProcesses = 8
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.numRowSamples = 30
        maxLocalAuc.scaleAlpha = False
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0.0
        print(maxLocalAuc)

        ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
Пример #2
0
    def profileLearnModel2(self):
        #Profile stochastic case
        #X = DatasetUtils.flixster()
        #X = Sampling.sampleUsers(X, 1000)
        X, U, V = DatasetUtils.syntheticDataset1(u=0.001, m=10000, n=1000)

        rho = 0.00
        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        k = self.k
        maxLocalAuc = MaxLocalAUC(k, w, alpha=alpha, eps=eps, stochastic=True)
        maxLocalAuc.numRowSamples = 2
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.maxIterations = 1
        maxLocalAuc.numRecordAucSamples = 100
        maxLocalAuc.recordStep = 10
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "optimal"
        #maxLocalAuc.parallelSGD = True

        trainTestX = Sampling.shuffleSplitRows(X, maxLocalAuc.folds, 5)
        trainX, testX = trainTestX[0]

        def run():
            U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
                trainX, True)
            #logging.debug("Train Precision@5=" + str(MCEvaluator.precisionAtK(trainX, U, V, 5)))
            #logging.debug("Train Precision@10=" + str(MCEvaluator.precisionAtK(trainX, U, V, 10)))
            #logging.debug("Train Precision@20=" + str(MCEvaluator.precisionAtK(trainX, U, V, 20)))
            #logging.debug("Train Precision@50=" + str(MCEvaluator.precisionAtK(trainX, U, V, 50)))

            #logging.debug("Test Precision@5=" + str(MCEvaluator.precisionAtK(testX, U, V, 5)))
            #logging.debug("Test Precision@10=" + str(MCEvaluator.precisionAtK(testX, U, V, 10)))
            #logging.debug("Test Precision@20=" + str(MCEvaluator.precisionAtK(testX, U, V, 20)))
            #logging.debug("Test Precision@50=" + str(MCEvaluator.precisionAtK(testX, U, V, 50)))

        ProfileUtils.profile('run()', globals(), locals())
Пример #3
0
    def testDerivativeViApprox(self): 
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20 
        n = 30 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
             
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        numRuns = 200 
        numTests = 5

        #Let's compare against using the exact derivative 
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)   
            
            
            dv3 = numpy.zeros(k)
            for j in range(k): 
                eps = 10**-6
                tempV = V.copy() 
                tempV[i,j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                
                tempV = V.copy() 
                tempV[i,j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                
                dv3[j] = (obj1-obj2)/(2*eps)            
            
            print(dv1, dv2, dv3)
            
            nptst.assert_array_almost_equal(dv1, dv2, 3)
            
        learner.lmbdaV = 0.5 
        learner.rho = 0.5
        
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
    
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V,  gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) 
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
            
        learner.numRowSamples = 10 
        numRuns = 1000
        
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)  
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m 
        maxLocalAuc.numAucSamples = 20 
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")
        
        #for i in numpy.random.permutation(m)[0:numTests]: 
        for i in range(m): 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) 
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)   
                      
            
            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
Пример #4
0
w2 = 1-u2
eps = 10**-4
lmbda = 0.0
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=lmbda, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.alpha = 0.05
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.folds = 1
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.maxIterations = 500
maxLocalAuc.metric = "f1"
maxLocalAuc.normalise = True
maxLocalAuc.numAucSamples = 10
maxLocalAuc.numProcesses = 1
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.numRowSamples = 30
maxLocalAuc.rate = "constant"
maxLocalAuc.recordStep = 10
maxLocalAuc.rho = 1.0
maxLocalAuc.t0 = 1.0
maxLocalAuc.t0s = 2.0**-numpy.arange(7, 12, 1)
maxLocalAuc.validationSize = 3
maxLocalAuc.validationUsers = 0

os.system('taskset -p 0xffffffff %d' % os.getpid())

logging.debug("Starting training")
losses = [("tanh", 0.25), ("tanh", 0.5), ("tanh", 1.0), ("tanh", 2.0), ("hinge", 1), ("square", 1), ("logistic", 0.5), ("logistic", 1.0), ("logistic", 2.0), ("sigmoid", 0.5), ("sigmoid", 1.0), ("sigmoid", 2.0)]
Пример #5
0
    def testDerivativeViApprox(self):
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20
        n = 30
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0

        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        numRuns = 200
        numTests = 5

        #Let's compare against using the exact derivative
        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            dv3 = numpy.zeros(k)
            for j in range(k):
                eps = 10**-6
                tempV = V.copy()
                tempV[i, j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                tempV = V.copy()
                tempV[i, j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                dv3[j] = (obj1 - obj2) / (2 * eps)

            print(dv1, dv2, dv3)

            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.lmbdaV = 0.5
        learner.rho = 0.5

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.numRowSamples = 10
        numRuns = 1000

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m
        maxLocalAuc.numAucSamples = 20
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")

        #for i in numpy.random.permutation(m)[0:numTests]:
        for i in range(m):
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)