Exemplo n.º 1
0
    def profileDerivativeUiApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n), numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 10
            for j in range(numRuns):
                for i in range(self.m):
                    learner.derivativeUiApprox(indPtr, colInds, U, V, gp, gq, permutedColInds, i)

        ProfileUtils.profile("run()", globals(), locals())
Exemplo n.º 2
0
    def testScale(self):
        """
        Look at the scales of the unnormalised gradients. 
        """

        m = 100
        n = 400
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        normDui = 0
        for i in range(m):
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDui += numpy.linalg.norm(du)

        normDui /= float(m)
        print(normDui)

        normDvi = 0

        for i in range(n):
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDvi += numpy.linalg.norm(dv)

        normDvi /= float(n)
        print(normDvi)
Exemplo n.º 3
0
    def testOverfit(self):
        """
        See if we can get a zero objective on the hinge loss 
        """
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        k = 10
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "constant"
        maxLocalAuc.maxIterations = 500
        maxLocalAuc.numProcesses = 1
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0
        maxLocalAuc.lmbda = 0

        print("Overfit example")
        U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
            X, verbose=True)

        self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Exemplo n.º 4
0
    def testScale(self): 
        """
        Look at the scales of the unnormalised gradients. 
        """        
        
        m = 100 
        n = 400 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
        
        gi = numpy.random.rand(m)
        gi /= gi.sum()        
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        normDui = 0
        for i in range(m): 
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDui += numpy.linalg.norm(du)
            
        normDui /= float(m)
        print(normDui)        
        
        normDvi = 0         
        
        for i in range(n): 
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDvi += numpy.linalg.norm(dv)
            
        normDvi /= float(n)
        print(normDvi)
Exemplo n.º 5
0
    def testParallelLearnModel(self): 
        numpy.random.seed(21)
        m = 500 
        n = 200 
        k = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        
        u = 0.1
        w = 1-u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4
        
        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz/maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
Exemplo n.º 6
0
 def testOverfit(self): 
     """
     See if we can get a zero objective on the hinge loss 
     """
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     k = 10
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "constant"
     maxLocalAuc.maxIterations = 500
     maxLocalAuc.numProcesses = 1
     maxLocalAuc.loss = "hinge"
     maxLocalAuc.validationUsers = 0
     maxLocalAuc.lmbda = 0        
     
     print("Overfit example")
     U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(X, verbose=True)
     
     self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Exemplo n.º 7
0
    def testModelSelectLmbda(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        os.system('taskset -p 0xffffffff %d' % os.getpid())

        eps = 0.001
        k = 5
        maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.recordStep = 1
        maxLocalAuc.validationSize = 3
        maxLocalAuc.metric = "f1"
        maxLocalAuc.rate = "constant"
        maxLocalAuc.ks = numpy.array([4, 8])
        maxLocalAuc.modelSelectLmbda(X)
    def profileDerivativeVjApprox(self):
        k = 10
        U = numpy.random.rand(self.m, k)
        V = numpy.random.rand(self.n, k)

        indPtr, colInds = SparseUtils.getOmegaListPtr(self.X)

        gp = numpy.random.rand(self.n)
        gp /= gp.sum()
        gq = numpy.random.rand(self.n)
        gq /= gq.sum()

        j = 3
        numRowSamples = 100
        numAucSamples = 10

        permutedRowInds = numpy.array(numpy.random.permutation(self.m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(self.n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w=0.9)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq,
                                                    self.m)

        lmbda = 0.001
        normalise = True

        learner = MaxLocalAUCCython()

        def run():
            numRuns = 1
            for i in range(numRuns):
                for j in range(self.n):
                    learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq,
                                               normGp, normGq, permutedRowInds,
                                               permutedColInds, i)

        ProfileUtils.profile('run()', globals(), locals())
Exemplo n.º 9
0
 def testModelSelectLmbda(self): 
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     os.system('taskset -p 0xffffffff %d' % os.getpid())
     
     eps = 0.001
     k = 5
     maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
     maxLocalAuc.maxIterations = 5
     maxLocalAuc.recordStep = 1
     maxLocalAuc.validationSize = 3
     maxLocalAuc.metric = "f1"
     maxLocalAuc.rate = "constant"
     maxLocalAuc.ks = numpy.array([4, 8])
     maxLocalAuc.modelSelectLmbda(X)
Exemplo n.º 10
0
    def profileLearnModel2(self):
        #Profile stochastic case
        #X = DatasetUtils.flixster()
        #X = Sampling.sampleUsers(X, 1000)
        X, U, V = DatasetUtils.syntheticDataset1(u=0.001, m=10000, n=1000)

        rho = 0.00
        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        k = self.k
        maxLocalAuc = MaxLocalAUC(k, w, alpha=alpha, eps=eps, stochastic=True)
        maxLocalAuc.numRowSamples = 2
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.maxIterations = 1
        maxLocalAuc.numRecordAucSamples = 100
        maxLocalAuc.recordStep = 10
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "optimal"
        #maxLocalAuc.parallelSGD = True

        trainTestX = Sampling.shuffleSplitRows(X, maxLocalAuc.folds, 5)
        trainX, testX = trainTestX[0]

        def run():
            U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
                trainX, True)
            #logging.debug("Train Precision@5=" + str(MCEvaluator.precisionAtK(trainX, U, V, 5)))
            #logging.debug("Train Precision@10=" + str(MCEvaluator.precisionAtK(trainX, U, V, 10)))
            #logging.debug("Train Precision@20=" + str(MCEvaluator.precisionAtK(trainX, U, V, 20)))
            #logging.debug("Train Precision@50=" + str(MCEvaluator.precisionAtK(trainX, U, V, 50)))

            #logging.debug("Test Precision@5=" + str(MCEvaluator.precisionAtK(testX, U, V, 5)))
            #logging.debug("Test Precision@10=" + str(MCEvaluator.precisionAtK(testX, U, V, 10)))
            #logging.debug("Test Precision@20=" + str(MCEvaluator.precisionAtK(testX, U, V, 20)))
            #logging.debug("Test Precision@50=" + str(MCEvaluator.precisionAtK(testX, U, V, 50)))

        ProfileUtils.profile('run()', globals(), locals())
Exemplo n.º 11
0
    def testLearningRateSelect(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.numProcesses = 1

        maxLocalAuc.learningRateSelect(X)
Exemplo n.º 12
0
    def testParallelLearnModel(self):
        numpy.random.seed(21)
        m = 500
        n = 200
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        u = 0.1
        w = 1 - u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4

        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz / maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
Exemplo n.º 13
0
    def testLearnModel(self):
        m = 50
        n = 20
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        u = 0.1
        w = 1 - u
        eps = 0.05

        maxLocalAuc = MaxLocalAUC(k, w, alpha=5.0, eps=eps, stochastic=False)
        U, V = maxLocalAuc.learnModel(X)

        maxLocalAuc.stochastic = True
        U, V = maxLocalAuc.learnModel(X)

        #Test case where we do not have validation set
        maxLocalAuc.validationUsers = 0.0
        U, V = maxLocalAuc.learnModel(X)
Exemplo n.º 14
0
 def testLearningRateSelect(self): 
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "optimal"
     maxLocalAuc.maxIterations = 5
     maxLocalAuc.numProcesses = 1
     
     maxLocalAuc.learningRateSelect(X)
Exemplo n.º 15
0
    def testLearnModel(self): 
        m = 50 
        n = 20 
        k = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        u = 0.1
        w = 1-u
        eps = 0.05
        
        maxLocalAuc = MaxLocalAUC(k, w, alpha=5.0, eps=eps, stochastic=False)
        U, V = maxLocalAuc.learnModel(X)
        
        maxLocalAuc.stochastic = True 
        U, V = maxLocalAuc.learnModel(X)
        
        #Test case where we do not have validation set 
        maxLocalAuc.validationUsers = 0.0
        U, V = maxLocalAuc.learnModel(X)
Exemplo n.º 16
0
    dataset = "synthetic"

saveResults = True
prefix = "Exp2"
outputFile = PathDefaults.getOutputDir() + "ranking/" + prefix + dataset.title() + "Results.npz" 
X = DatasetUtils.getDataset(dataset)
        
testSize = 5
folds = 2
trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)

u = 0.1
w2 = 1-u 
k = 64
eps = 10**-8
maxLocalAuc = MaxLocalAUC(k, w2, eps=eps, stochastic=True)
maxLocalAuc.maxIterations = 50
maxLocalAuc.numRowSamples = 30
maxLocalAuc.numAucSamples = 10
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.recordStep = 10
maxLocalAuc.rate = "optimal"
maxLocalAuc.alpha = 1.0
maxLocalAuc.t0 = 0.1
maxLocalAuc.lmbdaU = 0.0
maxLocalAuc.lmbdaV = 1.0
maxLocalAuc.rho = 0.5

maxItems = 10
chunkSize = 1
startAverages = numpy.array([2, 5, 10, 20, 30, 40])
Exemplo n.º 17
0
X = DatasetUtils.getDataset(dataset)
    
m, n = X.shape

testSize = 5
trainTestXs = Sampling.shuffleSplitRows(X, 1, testSize)
trainX, testX = trainTestXs[0]

logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz)))

k2 = 32
u2 = 0.1
w2 = 1-u2
eps = 10**-8
lmbda = 1.0
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=0.0, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.alpha = 1.0
maxLocalAuc.alphas = 2.0**-numpy.arange(-5, 5, 1)
maxLocalAuc.folds = 5
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 1.0
maxLocalAuc.itemExpQ = 1.0
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.maxIterations = 100
maxLocalAuc.metric = "f1"
maxLocalAuc.normalise = True
maxLocalAuc.numAucSamples = 10
#maxLocalAuc.numProcesses = 1
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.numRowSamples = 30
maxLocalAuc.rate = "optimal"
Exemplo n.º 18
0
m, n = X.shape
u = 0.1 
w = 1-u

testSize = 5
folds = 5
trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)

numRecordAucSamples = 200

k2 = 8
u2 = 0.5
w2 = 1-u2
eps = 10**-8
lmbda = 0.1
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=lmbda, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.maxIterations = 100
maxLocalAuc.numRowSamples = 30
maxLocalAuc.numAucSamples = 10
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.recordStep = 10
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.rate = "constant"
maxLocalAuc.alpha = 0.1
maxLocalAuc.t0 = 1.0
maxLocalAuc.folds = 1
maxLocalAuc.rho = 1.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.validationSize = 3
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.normalise = True
Exemplo n.º 19
0
    def testDerivativeViApprox(self):
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20
        n = 30
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0

        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        numRuns = 200
        numTests = 5

        #Let's compare against using the exact derivative
        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            dv3 = numpy.zeros(k)
            for j in range(k):
                eps = 10**-6
                tempV = V.copy()
                tempV[i, j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                tempV = V.copy()
                tempV[i, j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U,
                                         tempV, gp, gq)

                dv3[j] = (obj1 - obj2) / (2 * eps)

            print(dv1, dv2, dv3)

            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.lmbdaV = 0.5
        learner.rho = 0.5

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        learner.numRowSamples = 10
        numRuns = 1000

        for i in numpy.random.permutation(m)[0:numTests]:
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m
        maxLocalAuc.numAucSamples = 20
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")

        #for i in numpy.random.permutation(m)[0:numTests]:
        for i in range(m):
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)

            dv1 = numpy.zeros(k)
            for j in range(numRuns):
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp,
                                                  gq, normGp, normGq,
                                                  permutedRowInds,
                                                  permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)

            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
Exemplo n.º 20
0
 def testCopy(self): 
     u= 0.1
     eps = 0.001
     k = 10 
     maxLocalAuc = MaxLocalAUC(k, u, alpha=5.0, eps=eps)
     maxLocalAuc.copy()
Exemplo n.º 21
0
saveResults = True
prefix = "Regularisation5"
outputFile = PathDefaults.getOutputDir() + "ranking/" + prefix + dataset.title() + "Results.npz" 
X = DatasetUtils.getDataset(dataset)

testSize = 5
trainTestXs = Sampling.shuffleSplitRows(X, 1, testSize)
trainX, testX = trainTestXs[0]

logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz)))

u = 0.1
w = 1-u
k2 = 64
eps = 10**-6
maxLocalAuc = MaxLocalAUC(k2, w, eps=eps, stochastic=True)
maxLocalAuc.alpha = 0.1
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.folds = 1
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.lmbdaU = 0.0
maxLocalAuc.lmbdaV = 0.0
maxLocalAuc.lmbdas = 2.0**-numpy.arange(0, 8)
maxLocalAuc.loss = "hinge"
maxLocalAuc.maxIterations = 500
maxLocalAuc.maxNorms = 2.0**numpy.arange(-2, 5, 0.5)
maxLocalAuc.metric = "f1"
maxLocalAuc.normalise = True
Exemplo n.º 22
0
    def profileLearnModel(self):
        #Profile full gradient descent
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        #X, U, V = DatasetUtils.syntheticDataset1()
        #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2)
        #X = DatasetUtils.flixster()

        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        maxLocalAuc = MaxLocalAUC(self.k,
                                  w,
                                  alpha=alpha,
                                  eps=eps,
                                  stochastic=True)
        maxLocalAuc.maxNormU = 10
        maxLocalAuc.maxNormV = 10
        maxLocalAuc.maxIterations = 100
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "constant"
        maxLocalAuc.parallelSGD = True
        maxLocalAuc.numProcesses = 8
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.numRowSamples = 30
        maxLocalAuc.scaleAlpha = False
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0.0
        print(maxLocalAuc)

        ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
Exemplo n.º 23
0
 def testCopy(self):
     u = 0.1
     eps = 0.001
     k = 10
     maxLocalAuc = MaxLocalAUC(k, u, alpha=5.0, eps=eps)
     maxLocalAuc.copy()
Exemplo n.º 24
0
u = 0.1 
w = 1-u

logging.debug("Sampled X shape: " + str(X.shape))

testSize = 5
folds = 5
trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)

numRecordAucSamples = 200

k2 = 64
u2 = 0.5
w2 = 1-u2
eps = 10**-8
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps,stochastic=True)
maxLocalAuc.alpha = 0.05
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 9, 1)
maxLocalAuc.folds = 1
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.maxNorms = 2.0**numpy.arange(-2, 2)
maxLocalAuc.maxIterations = 500
maxLocalAuc.lmbdaU = 0
maxLocalAuc.lmbdaV = 0
maxLocalAuc.loss = "hinge"
maxLocalAuc.maxIterations = 500
maxLocalAuc.maxNormU = 100
maxLocalAuc.maxNormV = 100
Exemplo n.º 25
0
 def testStr(self):
     k = 10
     u = 0.1
     eps = 0.001
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps)
Exemplo n.º 26
0
    dataset = "movielens"

saveResults = True
prefix = "Rademacher"
outputFile = PathDefaults.getOutputDir() + "ranking/" + prefix + dataset.title() + "Results.npz" 
X = DatasetUtils.getDataset(dataset, nnz=20000)
    

m, n = X.shape

k2 = 16
u2 = 5/float(n)
w2 = 1-u2
eps = 10**-8
lmbda = 0.0
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=lmbda, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.alpha = 0.1
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.bound = True
maxLocalAuc.delta = 0.1
maxLocalAuc.eta = 0
maxLocalAuc.folds = 2
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([4, 8, 16, 32, 64, 128])
maxLocalAuc.lmbdas = 2.0**-numpy.arange(1, 10)
maxLocalAuc.loss = "hinge" 
maxLocalAuc.maxIterations = 100
maxLocalAuc.maxNorm = 1/numpy.sqrt(2)
maxLocalAuc.metric = "f1"
Exemplo n.º 27
0
softImpute.maxIterations = maxIterations
softImpute.metric = "f1" 
softImpute.q = 3
softImpute.p = 10
softImpute.rho = 0.1
softImpute.eps = 10**-4 
softImpute.numProcesses = args.processes

wrmf = WeightedMf(k=k, maxIterations=maxIterations, alpha=1.0)
wrmf.ks = ks
wrmf.folds = folds 
wrmf.lmbdas = 2.0**-numpy.arange(-1, 12, 2)
wrmf.metric = "f1" 
wrmf.numProcesses = args.processes

maxLocalAuc = MaxLocalAUC(k=k, w=0.9, maxIterations=50, lmbdaU=0.1, lmbdaV=0.1, stochastic=True)
maxLocalAuc.numRowSamples = 10
maxLocalAuc.parallelSGD = True
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.ks = ks
maxLocalAuc.folds = folds
maxLocalAuc.metric = "f1"
maxLocalAuc.numProcesses = args.processes

kNeighbours = 25
knn = CosineKNNRecommender(kNeighbours)

numFeatures = 200
slim = SLIM(num_selected_features=numFeatures)

learners = [("SoftImpute", softImpute), ("WRMF", wrmf), ("KNN", knn), ("MLAUC", maxLocalAuc), ("SLIM", slim)]
Exemplo n.º 28
0
    dataset = sys.argv[1]
else: 
    dataset = "movielens"

saveResults = False
prefix = "LearningRate2"
outputFile = PathDefaults.getOutputDir() + "ranking/" + prefix + dataset.title() + "Results.npz" 
X = DatasetUtils.getDataset(dataset)
m, n = X.shape

k2 = 64
u2 = 5/float(n)
w2 = 1-u2
eps = 10**-8
lmbda = 0.01
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=0.1, lmbdaV=0.1, stochastic=True)
maxLocalAuc.alpha = 0.5
maxLocalAuc.alphas = 2.0**-numpy.arange(2, 9, 2)
maxLocalAuc.beta = 2
maxLocalAuc.bound = False
maxLocalAuc.delta = 0.1
maxLocalAuc.eta = 20
maxLocalAuc.folds = 2
maxLocalAuc.initialAlg = "svd"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([4, 8, 16, 32, 64, 128])
maxLocalAuc.lmbdas = 2.0**-numpy.arange(1, 5)
maxLocalAuc.loss = "hinge" 
maxLocalAuc.maxIterations = 500
maxLocalAuc.maxNorm = 100
Exemplo n.º 29
0
trainX, testX = trainTestXs[0]

trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
allOmegaPtr = SparseUtils.getOmegaListPtr(X)
numRecordAucSamples = 200

logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz)))

#w = 1.0
k2 = 8
u2 = 5/float(n)
w2 = 1-u2
eps = 10**-8
lmbda = 0.01
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=0.1, lmbdaV=0.1, stochastic=True)
maxLocalAuc.alpha = 0.1
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.beta = 2
maxLocalAuc.bound = False
maxLocalAuc.delta = 0.1
maxLocalAuc.eta = 0
maxLocalAuc.folds = 2
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([4, 8, 16, 32, 64, 128])
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.loss = "hinge" 
maxLocalAuc.maxIterations = 100
maxLocalAuc.maxNorm = 100
Exemplo n.º 30
0
m, n = X.shape
u = 0.1 
w = 1-u

testSize = 5
folds = 5
trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)

numRecordAucSamples = 200

k2 = 8
u2 = 0.5
w2 = 1-u2
eps = 10**-4
lmbda = 0.0
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=lmbda, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.alpha = 0.05
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.folds = 1
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.maxIterations = 500
maxLocalAuc.metric = "f1"
maxLocalAuc.normalise = True
maxLocalAuc.numAucSamples = 10
maxLocalAuc.numProcesses = 1
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.numRowSamples = 30
Exemplo n.º 31
0
"""

if len(sys.argv) > 1:
    dataset = sys.argv[1]
else: 
    dataset = "flixster"

saveResults = True
prefix = "ModelSelect"
outputFile = PathDefaults.getOutputDir() + "ranking/" + prefix + dataset.title() + "Results.npz" 

u = 0.1
w = 1-u
k2 = 64
eps = 10**-6
maxLocalAuc = MaxLocalAUC(k2, w, eps=eps, stochastic=True)
maxLocalAuc.alpha = 0.1
maxLocalAuc.alphas = 2.0**-numpy.arange(1, 7, 1)
maxLocalAuc.folds = 1
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.lmbdas = 2.0**-numpy.arange(1, 6)
maxLocalAuc.lmbdaU = 0.25
maxLocalAuc.lmbdaV = 0.25
maxLocalAuc.loss = "hinge"
maxLocalAuc.maxIterations = 500
maxLocalAuc.maxNormU = 100
maxLocalAuc.maxNormV = 100
maxLocalAuc.metric = "f1"
Exemplo n.º 32
0
    def testDerivativeViApprox(self): 
        """
        We'll test the case in which we apprormate using a large number of samples 
        for the AUC and see if we get close to the exact derivative 
        """
        m = 20 
        n = 30 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        for i in range(m):
            X[i, 0] = 1
            X[i, 1] = 0
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCSigmoid(k, w)
        learner.normalise = False
        learner.lmbdaU = 0
        learner.lmbdaV = 0
        learner.numAucSamples = n
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
             
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        numRuns = 200 
        numTests = 5

        #Let's compare against using the exact derivative 
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)   
            
            
            dv3 = numpy.zeros(k)
            for j in range(k): 
                eps = 10**-6
                tempV = V.copy() 
                tempV[i,j] += eps
                obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                
                tempV = V.copy() 
                tempV[i,j] -= eps
                obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq)
                
                dv3[j] = (obj1-obj2)/(2*eps)            
            
            print(dv1, dv2, dv3)
            
            nptst.assert_array_almost_equal(dv1, dv2, 3)
            
        learner.lmbdaV = 0.5 
        learner.rho = 0.5
        
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
    
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V,  gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) 
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
            
        learner.numRowSamples = 10 
        numRuns = 1000
        
        for i in numpy.random.permutation(m)[0:numTests]: 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)  
            print(dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)

        maxLocalAuc.numRowSamples = m 
        maxLocalAuc.numAucSamples = 20 
        maxLocalAuc.lmbdaV = 0
        numRuns = 1000
        print("Final test")
        
        #for i in numpy.random.permutation(m)[0:numTests]: 
        for i in range(m): 
            U = numpy.random.rand(X.shape[0], k)
            V = numpy.random.rand(X.shape[1], k)            
            
            dv1 = numpy.zeros(k)
            for j in range(numRuns): 
                dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i)
            dv1 /= numRuns
            #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) 
            dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i)   
                      
            
            print(i, dv1, dv2)
            nptst.assert_array_almost_equal(dv1, dv2, 3)
Exemplo n.º 33
0
trainOmegaList = SparseUtils.getOmegaList(trainX)
trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX)
testOmegaList = SparseUtils.getOmegaList(testX)
testOmegaPtr = SparseUtils.getOmegaListPtr(testX)
allOmegaPtr = SparseUtils.getOmegaListPtr(X)
numRecordAucSamples = 200

logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz)))

k2 = 64
u2 = 5/float(n)
w2 = 1-u2
eps = 10**-8
lmbda = 0.01
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=0.1, lmbdaV=0.1, stochastic=True)
maxLocalAuc.alpha = 32
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.beta = 2
maxLocalAuc.bound = False
maxLocalAuc.delta = 0.1
maxLocalAuc.eta = 0
maxLocalAuc.folds = 2
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([4, 8, 16, 32, 64, 128])
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.loss = "hinge" 
maxLocalAuc.maxIterations = 100
maxLocalAuc.maxNorm = 100