def testOverfit(self): """ See if we can get a zero objective on the hinge loss """ m = 10 n = 20 k = 5 u = 0.5 w = 1 - u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) eps = 0.001 k = 10 maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True) maxLocalAuc.rate = "constant" maxLocalAuc.maxIterations = 500 maxLocalAuc.numProcesses = 1 maxLocalAuc.loss = "hinge" maxLocalAuc.validationUsers = 0 maxLocalAuc.lmbda = 0 print("Overfit example") U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel( X, verbose=True) self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
def profileLearnModel(self): #Profile full gradient descent X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000) #X, U, V = DatasetUtils.syntheticDataset1() #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2) #X = DatasetUtils.flixster() u = 0.2 w = 1 - u eps = 10**-6 alpha = 0.5 maxLocalAuc = MaxLocalAUC(self.k, w, alpha=alpha, eps=eps, stochastic=True) maxLocalAuc.maxNormU = 10 maxLocalAuc.maxNormV = 10 maxLocalAuc.maxIterations = 100 maxLocalAuc.initialAlg = "rand" maxLocalAuc.rate = "constant" maxLocalAuc.parallelSGD = True maxLocalAuc.numProcesses = 8 maxLocalAuc.numAucSamples = 10 maxLocalAuc.numRowSamples = 30 maxLocalAuc.scaleAlpha = False maxLocalAuc.loss = "hinge" maxLocalAuc.validationUsers = 0.0 print(maxLocalAuc) ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
def testParallelLearnModel(self): numpy.random.seed(21) m = 500 n = 200 k = 5 X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True) from wallhack.rankingexp.DatasetUtils import DatasetUtils X, U, V = DatasetUtils.syntheticDataset1() u = 0.1 w = 1-u eps = 0.05 maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True) maxLocalAuc.maxIterations = 3 maxLocalAuc.recordStep = 1 maxLocalAuc.rate = "optimal" maxLocalAuc.t0 = 2.0 maxLocalAuc.validationUsers = 0.0 maxLocalAuc.numProcesses = 4 os.system('taskset -p 0xffffffff %d' % os.getpid()) print(X.nnz/maxLocalAuc.numAucSamples) U, V = maxLocalAuc.parallelLearnModel(X)
def testOverfit(self): """ See if we can get a zero objective on the hinge loss """ m = 10 n = 20 k = 5 u = 0.5 w = 1-u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) eps = 0.001 k = 10 maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True) maxLocalAuc.rate = "constant" maxLocalAuc.maxIterations = 500 maxLocalAuc.numProcesses = 1 maxLocalAuc.loss = "hinge" maxLocalAuc.validationUsers = 0 maxLocalAuc.lmbda = 0 print("Overfit example") U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(X, verbose=True) self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
def testLearningRateSelect(self): m = 10 n = 20 k = 5 u = 0.5 w = 1 - u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) eps = 0.001 maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True) maxLocalAuc.rate = "optimal" maxLocalAuc.maxIterations = 5 maxLocalAuc.numProcesses = 1 maxLocalAuc.learningRateSelect(X)
def testLearningRateSelect(self): m = 10 n = 20 k = 5 u = 0.5 w = 1-u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) eps = 0.001 maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True) maxLocalAuc.rate = "optimal" maxLocalAuc.maxIterations = 5 maxLocalAuc.numProcesses = 1 maxLocalAuc.learningRateSelect(X)
def testModelSelectMaxNorm(self): m = 10 n = 20 k = 5 u = 0.5 w = 1 - u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) os.system('taskset -p 0xffffffff %d' % os.getpid()) eps = 0.001 k = 5 maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True) maxLocalAuc.maxIterations = 5 maxLocalAuc.recordStep = 1 maxLocalAuc.validationSize = 3 maxLocalAuc.metric = "f1" maxLocalAuc.modelSelectNorm(X)
def testModelSelectMaxNorm(self): m = 10 n = 20 k = 5 u = 0.5 w = 1-u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) os.system('taskset -p 0xffffffff %d' % os.getpid()) eps = 0.001 k = 5 maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True) maxLocalAuc.maxIterations = 5 maxLocalAuc.recordStep = 1 maxLocalAuc.validationSize = 3 maxLocalAuc.metric = "f1" maxLocalAuc.modelSelectNorm(X)
def profileLearnModel2(self): #Profile stochastic case #X = DatasetUtils.flixster() #X = Sampling.sampleUsers(X, 1000) X, U, V = DatasetUtils.syntheticDataset1(u=0.001, m=10000, n=1000) rho = 0.00 u = 0.2 w = 1 - u eps = 10**-6 alpha = 0.5 k = self.k maxLocalAuc = MaxLocalAUC(k, w, alpha=alpha, eps=eps, stochastic=True) maxLocalAuc.numRowSamples = 2 maxLocalAuc.numAucSamples = 10 maxLocalAuc.maxIterations = 1 maxLocalAuc.numRecordAucSamples = 100 maxLocalAuc.recordStep = 10 maxLocalAuc.initialAlg = "rand" maxLocalAuc.rate = "optimal" #maxLocalAuc.parallelSGD = True trainTestX = Sampling.shuffleSplitRows(X, maxLocalAuc.folds, 5) trainX, testX = trainTestX[0] def run(): U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel( trainX, True) #logging.debug("Train Precision@5=" + str(MCEvaluator.precisionAtK(trainX, U, V, 5))) #logging.debug("Train Precision@10=" + str(MCEvaluator.precisionAtK(trainX, U, V, 10))) #logging.debug("Train Precision@20=" + str(MCEvaluator.precisionAtK(trainX, U, V, 20))) #logging.debug("Train Precision@50=" + str(MCEvaluator.precisionAtK(trainX, U, V, 50))) #logging.debug("Test Precision@5=" + str(MCEvaluator.precisionAtK(testX, U, V, 5))) #logging.debug("Test Precision@10=" + str(MCEvaluator.precisionAtK(testX, U, V, 10))) #logging.debug("Test Precision@20=" + str(MCEvaluator.precisionAtK(testX, U, V, 20))) #logging.debug("Test Precision@50=" + str(MCEvaluator.precisionAtK(testX, U, V, 50))) ProfileUtils.profile('run()', globals(), locals())
def testParallelLearnModel(self): numpy.random.seed(21) m = 500 n = 200 k = 5 X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True) from wallhack.rankingexp.DatasetUtils import DatasetUtils X, U, V = DatasetUtils.syntheticDataset1() u = 0.1 w = 1 - u eps = 0.05 maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True) maxLocalAuc.maxIterations = 3 maxLocalAuc.recordStep = 1 maxLocalAuc.rate = "optimal" maxLocalAuc.t0 = 2.0 maxLocalAuc.validationUsers = 0.0 maxLocalAuc.numProcesses = 4 os.system('taskset -p 0xffffffff %d' % os.getpid()) print(X.nnz / maxLocalAuc.numAucSamples) U, V = maxLocalAuc.parallelLearnModel(X)
k2 = 8 u2 = 0.5 w2 = 1-u2 eps = 10**-4 lmbda = 0.0 maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=lmbda, lmbdaV=lmbda, stochastic=True) maxLocalAuc.alpha = 0.05 maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1) maxLocalAuc.folds = 1 maxLocalAuc.initialAlg = "rand" maxLocalAuc.itemExpP = 0.0 maxLocalAuc.itemExpQ = 0.0 maxLocalAuc.ks = numpy.array([k2]) maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7) maxLocalAuc.maxIterations = 500 maxLocalAuc.metric = "f1" maxLocalAuc.normalise = True maxLocalAuc.numAucSamples = 10 maxLocalAuc.numProcesses = 1 maxLocalAuc.numRecordAucSamples = 100 maxLocalAuc.numRowSamples = 30 maxLocalAuc.rate = "constant" maxLocalAuc.recordStep = 10 maxLocalAuc.rho = 1.0 maxLocalAuc.t0 = 1.0 maxLocalAuc.t0s = 2.0**-numpy.arange(7, 12, 1) maxLocalAuc.validationSize = 3 maxLocalAuc.validationUsers = 0 os.system('taskset -p 0xffffffff %d' % os.getpid())
logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz))) k2 = 32 u2 = 0.1 w2 = 1-u2 eps = 10**-8 lmbda = 1.0 maxLocalAuc = MaxLocalAUC(k2, w2, eps=eps, lmbdaU=0.0, lmbdaV=lmbda, stochastic=True) maxLocalAuc.alpha = 1.0 maxLocalAuc.alphas = 2.0**-numpy.arange(-5, 5, 1) maxLocalAuc.folds = 5 maxLocalAuc.initialAlg = "rand" maxLocalAuc.itemExpP = 1.0 maxLocalAuc.itemExpQ = 1.0 maxLocalAuc.lmbdas = numpy.linspace(0.5, 2.0, 7) maxLocalAuc.maxIterations = 100 maxLocalAuc.metric = "f1" maxLocalAuc.normalise = True maxLocalAuc.numAucSamples = 10 #maxLocalAuc.numProcesses = 1 maxLocalAuc.numRecordAucSamples = 100 maxLocalAuc.numRowSamples = 30 maxLocalAuc.rate = "optimal" maxLocalAuc.recommendSize = 5 maxLocalAuc.recordStep = 1 maxLocalAuc.rho = 1.0 maxLocalAuc.t0 = 1.0 maxLocalAuc.t0s = 2.0**-numpy.arange(-1, 6, 1) maxLocalAuc.validationSize = 5 maxLocalAuc.validationUsers = 0