示例#1
0
    def testOverfit(self):
        """
        See if we can get a zero objective on the hinge loss 
        """
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        k = 10
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "constant"
        maxLocalAuc.maxIterations = 500
        maxLocalAuc.numProcesses = 1
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0
        maxLocalAuc.lmbda = 0

        print("Overfit example")
        U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
            X, verbose=True)

        self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
示例#2
0
    def profileLearnModel(self):
        #Profile full gradient descent
        X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000)
        #X, U, V = DatasetUtils.syntheticDataset1()
        #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2)
        #X = DatasetUtils.flixster()

        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        maxLocalAuc = MaxLocalAUC(self.k,
                                  w,
                                  alpha=alpha,
                                  eps=eps,
                                  stochastic=True)
        maxLocalAuc.maxNormU = 10
        maxLocalAuc.maxNormV = 10
        maxLocalAuc.maxIterations = 100
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "constant"
        maxLocalAuc.parallelSGD = True
        maxLocalAuc.numProcesses = 8
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.numRowSamples = 30
        maxLocalAuc.scaleAlpha = False
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0.0
        print(maxLocalAuc)

        ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
示例#3
0
    def testParallelLearnModel(self): 
        numpy.random.seed(21)
        m = 500 
        n = 200 
        k = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        
        u = 0.1
        w = 1-u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4
        
        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz/maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
示例#4
0
 def testOverfit(self): 
     """
     See if we can get a zero objective on the hinge loss 
     """
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     k = 10
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "constant"
     maxLocalAuc.maxIterations = 500
     maxLocalAuc.numProcesses = 1
     maxLocalAuc.loss = "hinge"
     maxLocalAuc.validationUsers = 0
     maxLocalAuc.lmbda = 0        
     
     print("Overfit example")
     U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(X, verbose=True)
     
     self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
示例#5
0
    def testLearningRateSelect(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.numProcesses = 1

        maxLocalAuc.learningRateSelect(X)
示例#6
0
 def testLearningRateSelect(self): 
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "optimal"
     maxLocalAuc.maxIterations = 5
     maxLocalAuc.numProcesses = 1
     
     maxLocalAuc.learningRateSelect(X)
示例#7
0
    def testModelSelectMaxNorm(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        os.system('taskset -p 0xffffffff %d' % os.getpid())

        eps = 0.001
        k = 5
        maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.recordStep = 1
        maxLocalAuc.validationSize = 3
        maxLocalAuc.metric = "f1"

        maxLocalAuc.modelSelectNorm(X)
示例#8
0
 def testModelSelectMaxNorm(self): 
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     os.system('taskset -p 0xffffffff %d' % os.getpid())
     
     eps = 0.001
     k = 5
     maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
     maxLocalAuc.maxIterations = 5
     maxLocalAuc.recordStep = 1
     maxLocalAuc.validationSize = 3
     maxLocalAuc.metric = "f1"
     
     maxLocalAuc.modelSelectNorm(X)
示例#9
0
    def profileLearnModel2(self):
        #Profile stochastic case
        #X = DatasetUtils.flixster()
        #X = Sampling.sampleUsers(X, 1000)
        X, U, V = DatasetUtils.syntheticDataset1(u=0.001, m=10000, n=1000)

        rho = 0.00
        u = 0.2
        w = 1 - u
        eps = 10**-6
        alpha = 0.5
        k = self.k
        maxLocalAuc = MaxLocalAUC(k, w, alpha=alpha, eps=eps, stochastic=True)
        maxLocalAuc.numRowSamples = 2
        maxLocalAuc.numAucSamples = 10
        maxLocalAuc.maxIterations = 1
        maxLocalAuc.numRecordAucSamples = 100
        maxLocalAuc.recordStep = 10
        maxLocalAuc.initialAlg = "rand"
        maxLocalAuc.rate = "optimal"
        #maxLocalAuc.parallelSGD = True

        trainTestX = Sampling.shuffleSplitRows(X, maxLocalAuc.folds, 5)
        trainX, testX = trainTestX[0]

        def run():
            U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
                trainX, True)
            #logging.debug("Train Precision@5=" + str(MCEvaluator.precisionAtK(trainX, U, V, 5)))
            #logging.debug("Train Precision@10=" + str(MCEvaluator.precisionAtK(trainX, U, V, 10)))
            #logging.debug("Train Precision@20=" + str(MCEvaluator.precisionAtK(trainX, U, V, 20)))
            #logging.debug("Train Precision@50=" + str(MCEvaluator.precisionAtK(trainX, U, V, 50)))

            #logging.debug("Test Precision@5=" + str(MCEvaluator.precisionAtK(testX, U, V, 5)))
            #logging.debug("Test Precision@10=" + str(MCEvaluator.precisionAtK(testX, U, V, 10)))
            #logging.debug("Test Precision@20=" + str(MCEvaluator.precisionAtK(testX, U, V, 20)))
            #logging.debug("Test Precision@50=" + str(MCEvaluator.precisionAtK(testX, U, V, 50)))

        ProfileUtils.profile('run()', globals(), locals())
示例#10
0
    def testParallelLearnModel(self):
        numpy.random.seed(21)
        m = 500
        n = 200
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        u = 0.1
        w = 1 - u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4

        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz / maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
示例#11
0
k2 = 8
u2 = 0.5
w2 = 1-u2
eps = 10**-4
lmbda = 0.0
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=lmbda, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.alpha = 0.05
maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1)
maxLocalAuc.folds = 1
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 0.0
maxLocalAuc.itemExpQ = 0.0
maxLocalAuc.ks = numpy.array([k2])
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.maxIterations = 500
maxLocalAuc.metric = "f1"
maxLocalAuc.normalise = True
maxLocalAuc.numAucSamples = 10
maxLocalAuc.numProcesses = 1
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.numRowSamples = 30
maxLocalAuc.rate = "constant"
maxLocalAuc.recordStep = 10
maxLocalAuc.rho = 1.0
maxLocalAuc.t0 = 1.0
maxLocalAuc.t0s = 2.0**-numpy.arange(7, 12, 1)
maxLocalAuc.validationSize = 3
maxLocalAuc.validationUsers = 0

os.system('taskset -p 0xffffffff %d' % os.getpid())
示例#12
0
logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz)))

k2 = 32
u2 = 0.1
w2 = 1-u2
eps = 10**-8
lmbda = 1.0
maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=0.0, lmbdaV=lmbda, stochastic=True)
maxLocalAuc.alpha = 1.0
maxLocalAuc.alphas = 2.0**-numpy.arange(-5, 5, 1)
maxLocalAuc.folds = 5
maxLocalAuc.initialAlg = "rand"
maxLocalAuc.itemExpP = 1.0
maxLocalAuc.itemExpQ = 1.0
maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7)
maxLocalAuc.maxIterations = 100
maxLocalAuc.metric = "f1"
maxLocalAuc.normalise = True
maxLocalAuc.numAucSamples = 10
#maxLocalAuc.numProcesses = 1
maxLocalAuc.numRecordAucSamples = 100
maxLocalAuc.numRowSamples = 30
maxLocalAuc.rate = "optimal"
maxLocalAuc.recommendSize = 5
maxLocalAuc.recordStep = 1
maxLocalAuc.rho = 1.0
maxLocalAuc.t0 = 1.0
maxLocalAuc.t0s = 2.0**-numpy.arange(-1, 6, 1)
maxLocalAuc.validationSize = 5
maxLocalAuc.validationUsers = 0