def testDerivativeViApprox(self): """ We'll test the case in which we apprormate using a large number of samples for the AUC and see if we get close to the exact derivative """ m = 20 n = 30 k = 3 X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True) for i in range(m): X[i, 0] = 1 X[i, 1] = 0 w = 0.1 eps = 0.001 learner = MaxAUCSigmoid(k, w) learner.normalise = False learner.lmbdaU = 0 learner.lmbdaV = 0 learner.numAucSamples = n indPtr, colInds = SparseUtils.getOmegaListPtr(X) U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) gp = numpy.random.rand(n) gp /= gp.sum() gq = numpy.random.rand(n) gq /= gq.sum() permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32) permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32) maxLocalAuc = MaxLocalAUC(k, w) normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m) numRuns = 200 numTests = 5 #Let's compare against using the exact derivative for i in numpy.random.permutation(m)[0:numTests]: U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) dv3 = numpy.zeros(k) for j in range(k): eps = 10**-6 tempV = V.copy() tempV[i,j] += eps obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq) tempV = V.copy() tempV[i,j] -= eps obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq) dv3[j] = (obj1-obj2)/(2*eps) print(dv1, dv2, dv3) nptst.assert_array_almost_equal(dv1, dv2, 3) learner.lmbdaV = 0.5 learner.rho = 0.5 for i in numpy.random.permutation(m)[0:numTests]: U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) print(dv1, dv2) nptst.assert_array_almost_equal(dv1, dv2, 3) learner.numRowSamples = 10 numRuns = 1000 for i in numpy.random.permutation(m)[0:numTests]: U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) print(dv1, dv2) nptst.assert_array_almost_equal(dv1, dv2, 3) maxLocalAuc.numRowSamples = m maxLocalAuc.numAucSamples = 20 maxLocalAuc.lmbdaV = 0 numRuns = 1000 print("Final test") #for i in numpy.random.permutation(m)[0:numTests]: for i in range(m): U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) print(i, dv1, dv2) nptst.assert_array_almost_equal(dv1, dv2, 3)
logging.debug("Number of non-zero elements: " + str((trainX.nnz, testX.nnz))) u = 0.1 w = 1-u k2 = 64 eps = 10**-6 maxLocalAuc = MaxLocalAUC(k2, w, eps=eps, stochastic=True) maxLocalAuc.alpha = 0.1 maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1) maxLocalAuc.folds = 1 maxLocalAuc.initialAlg = "rand" maxLocalAuc.itemExpP = 0.0 maxLocalAuc.itemExpQ = 0.0 maxLocalAuc.ks = numpy.array([k2]) maxLocalAuc.lmbdaU = 0.0 maxLocalAuc.lmbdaV = 0.0 maxLocalAuc.lmbdas = 2.0**-numpy.arange(0, 8) maxLocalAuc.loss = "hinge" maxLocalAuc.maxIterations = 500 maxLocalAuc.maxNorms = 2.0**numpy.arange(-2, 5, 0.5) maxLocalAuc.metric = "f1" maxLocalAuc.normalise = True maxLocalAuc.numAucSamples = 10 maxLocalAuc.numProcesses = multiprocessing.cpu_count() maxLocalAuc.numRecordAucSamples = 100 maxLocalAuc.numRowSamples = 30 maxLocalAuc.rate = "constant" maxLocalAuc.recordStep = 10 maxLocalAuc.rho = 1.0 maxLocalAuc.t0 = 1.0 maxLocalAuc.t0s = 2.0**-numpy.arange(7, 12, 1)
def testDerivativeViApprox(self): """ We'll test the case in which we apprormate using a large number of samples for the AUC and see if we get close to the exact derivative """ m = 20 n = 30 k = 3 X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True) for i in range(m): X[i, 0] = 1 X[i, 1] = 0 w = 0.1 eps = 0.001 learner = MaxAUCSigmoid(k, w) learner.normalise = False learner.lmbdaU = 0 learner.lmbdaV = 0 learner.numAucSamples = n indPtr, colInds = SparseUtils.getOmegaListPtr(X) U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) gp = numpy.random.rand(n) gp /= gp.sum() gq = numpy.random.rand(n) gq /= gq.sum() permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32) permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32) maxLocalAuc = MaxLocalAUC(k, w) normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m) numRuns = 200 numTests = 5 #Let's compare against using the exact derivative for i in numpy.random.permutation(m)[0:numTests]: U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) dv3 = numpy.zeros(k) for j in range(k): eps = 10**-6 tempV = V.copy() tempV[i, j] += eps obj1 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq) tempV = V.copy() tempV[i, j] -= eps obj2 = learner.objective(indPtr, colInds, indPtr, colInds, U, tempV, gp, gq) dv3[j] = (obj1 - obj2) / (2 * eps) print(dv1, dv2, dv3) nptst.assert_array_almost_equal(dv1, dv2, 3) learner.lmbdaV = 0.5 learner.rho = 0.5 for i in numpy.random.permutation(m)[0:numTests]: U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) print(dv1, dv2) nptst.assert_array_almost_equal(dv1, dv2, 3) learner.numRowSamples = 10 numRuns = 1000 for i in numpy.random.permutation(m)[0:numTests]: U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) print(dv1, dv2) nptst.assert_array_almost_equal(dv1, dv2, 3) maxLocalAuc.numRowSamples = m maxLocalAuc.numAucSamples = 20 maxLocalAuc.lmbdaV = 0 numRuns = 1000 print("Final test") #for i in numpy.random.permutation(m)[0:numTests]: for i in range(m): U = numpy.random.rand(X.shape[0], k) V = numpy.random.rand(X.shape[1], k) dv1 = numpy.zeros(k) for j in range(numRuns): dv1 += learner.derivativeViApprox(indPtr, colInds, U, V, gp, gq, normGp, normGq, permutedRowInds, permutedColInds, i) dv1 /= numRuns #dv1 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) dv2 = learner.derivativeVi(indPtr, colInds, U, V, gp, gq, i) print(i, dv1, dv2) nptst.assert_array_almost_equal(dv1, dv2, 3)
u = 0.1 w2 = 1-u k = 64 eps = 10**-8 maxLocalAuc = MaxLocalAUC(k, w2, eps=eps, stochastic=True) maxLocalAuc.maxIterations = 50 maxLocalAuc.numRowSamples = 30 maxLocalAuc.numAucSamples = 10 maxLocalAuc.initialAlg = "rand" maxLocalAuc.recordStep = 10 maxLocalAuc.rate = "optimal" maxLocalAuc.alpha = 1.0 maxLocalAuc.t0 = 0.1 maxLocalAuc.lmbdaU = 0.0 maxLocalAuc.lmbdaV = 1.0 maxLocalAuc.rho = 0.5 maxItems = 10 chunkSize = 1 startAverages = numpy.array([2, 5, 10, 20, 30, 40]) learningRateParams = [(4.0, 1.0), (4.0, 0.5), (4.0, 0.1), (1.0, 1.0), (1.0, 0.5), (1.0, 0.1), (0.25, 1.0), (0.25, 0.5), (0.25, 0.1)] print(startAverages) def computeTestObj(args): trainX, maxLocalAuc = args numpy.random.seed(21) U, V, trainMeasures, testMeasures, iterations, totalTime = maxLocalAuc.learnModel(trainX, verbose=True) return U, V, trainMeasures[-1, 0], testMeasures[-1, 0]
maxLocalAuc.recordStep = 10 maxLocalAuc.reg = False maxLocalAuc.rho = 1.0 maxLocalAuc.startAverage = 100 maxLocalAuc.t0 = 1.0 maxLocalAuc.t0s = 2.0**-numpy.arange(1, 12, 2) maxLocalAuc.validationSize = 5 maxLocalAuc.validationUsers = 0.0 if saveResults: X = DatasetUtils.getDataset(dataset, nnz=100000) print(X.shape, X.nnz) print(maxLocalAuc) maxLocalAuc.lmbdaU = 0.25 maxLocalAuc.lmbdaV = 0.25 meanObjs1, paramDict = maxLocalAuc.learningRateSelect(X) maxLocalAuc.lmbdaU = 0.03125 maxLocalAuc.lmbdaV = 0.25 meanObjs2, paramDict = maxLocalAuc.learningRateSelect(X) maxLocalAuc.lmbdaU = 0.25 maxLocalAuc.lmbdaV = 0.03125 meanObjs3, paramDict = maxLocalAuc.learningRateSelect(X) maxLocalAuc.lmbdaU = 0.03125 maxLocalAuc.lmbdaV = 0.03125 meanObjs4, paramDict = maxLocalAuc.learningRateSelect(X) numpy.savez(outputFile, meanObjs1, meanObjs2, meanObjs3, meanObjs4)