def profileLearnModel(self): #Profile full gradient descent X, U, V = DatasetUtils.syntheticDataset1(u=0.01, m=1000, n=2000) #X, U, V = DatasetUtils.syntheticDataset1() #X, U, V = DatasetUtils.syntheticDataset1(u=0.2, sd=0.2) #X = DatasetUtils.flixster() u = 0.2 w = 1 - u eps = 10**-6 alpha = 0.5 maxLocalAuc = MaxLocalAUC(self.k, w, alpha=alpha, eps=eps, stochastic=True) maxLocalAuc.maxNormU = 10 maxLocalAuc.maxNormV = 10 maxLocalAuc.maxIterations = 100 maxLocalAuc.initialAlg = "rand" maxLocalAuc.rate = "constant" maxLocalAuc.parallelSGD = True maxLocalAuc.numProcesses = 8 maxLocalAuc.numAucSamples = 10 maxLocalAuc.numRowSamples = 30 maxLocalAuc.scaleAlpha = False maxLocalAuc.loss = "hinge" maxLocalAuc.validationUsers = 0.0 print(maxLocalAuc) ProfileUtils.profile('maxLocalAuc.learnModel(X)', globals(), locals())
def testOverfit(self): """ See if we can get a zero objective on the hinge loss """ m = 10 n = 20 k = 5 u = 0.5 w = 1 - u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) eps = 0.001 k = 10 maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True) maxLocalAuc.rate = "constant" maxLocalAuc.maxIterations = 500 maxLocalAuc.numProcesses = 1 maxLocalAuc.loss = "hinge" maxLocalAuc.validationUsers = 0 maxLocalAuc.lmbda = 0 print("Overfit example") U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel( X, verbose=True) self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
def testOverfit(self): """ See if we can get a zero objective on the hinge loss """ m = 10 n = 20 k = 5 u = 0.5 w = 1-u X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True) eps = 0.001 k = 10 maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True) maxLocalAuc.rate = "constant" maxLocalAuc.maxIterations = 500 maxLocalAuc.numProcesses = 1 maxLocalAuc.loss = "hinge" maxLocalAuc.validationUsers = 0 maxLocalAuc.lmbda = 0 print("Overfit example") U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(X, verbose=True) self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, U=U, V=V, verbose=True) fprTrain, tprTrain = MCEvaluator.averageRocCurve(trainX, U, V) fprTest, tprTest = MCEvaluator.averageRocCurve(testX, U, V) return fprTrain, tprTrain, fprTest, tprTest if saveResults: paramList = [] chunkSize = 1 U, V = maxLocalAuc.initUV(X) for loss, rho in losses: for trainX, testX in trainTestXs: maxLocalAuc.loss = loss maxLocalAuc.rho = rho paramList.append((trainX, testX, maxLocalAuc.copy(), U.copy(), V.copy())) pool = multiprocessing.Pool(maxtasksperchild=100, processes=multiprocessing.cpu_count()) resultsIterator = pool.imap(computeTestAuc, paramList, chunkSize) #import itertools #resultsIterator = itertools.imap(computeTestAuc, paramList) meanFprTrains = [] meanTprTrains = [] meanFprTests = [] meanTprTests = [] for loss in losses:
u = 0.1 w = 1-u k2 = 64 eps = 10**-6 maxLocalAuc = MaxLocalAUC(k2, w, eps=eps, stochastic=True) maxLocalAuc.alpha = 0.1 maxLocalAuc.alphas = 2.0**-numpy.arange(0, 5, 1) maxLocalAuc.folds = 1 maxLocalAuc.initialAlg = "rand" maxLocalAuc.itemExpP = 0.0 maxLocalAuc.itemExpQ = 0.0 maxLocalAuc.ks = numpy.array([k2]) maxLocalAuc.lmbdaU = 0.0 maxLocalAuc.lmbdaV = 0.0 maxLocalAuc.lmbdas = 2.0**-numpy.arange(0, 8) maxLocalAuc.loss = "hinge" maxLocalAuc.maxIterations = 500 maxLocalAuc.maxNorms = 2.0**numpy.arange(-2, 5, 0.5) maxLocalAuc.metric = "f1" maxLocalAuc.normalise = True maxLocalAuc.numAucSamples = 10 maxLocalAuc.numProcesses = multiprocessing.cpu_count() maxLocalAuc.numRecordAucSamples = 100 maxLocalAuc.numRowSamples = 30 maxLocalAuc.rate = "constant" maxLocalAuc.recordStep = 10 maxLocalAuc.rho = 1.0 maxLocalAuc.t0 = 1.0 maxLocalAuc.t0s = 2.0**-numpy.arange(7, 12, 1) maxLocalAuc.validationSize = 3 maxLocalAuc.validationUsers = 0