def localAUCApprox(positiveArray, U, V, w, numAucSamples=50, r=None, allArray=None): """ Compute the estimated local AUC for the score functions UV^T relative to X with quantile w. The AUC is computed using positiveArray which is a tuple (indPtr, colInds) assuming allArray is None. If allArray is not None then positive items are chosen from positiveArray and negative ones are chosen to complement allArray. """ if type(positiveArray) != tuple: positiveArray = SparseUtils.getOmegaListPtr(positiveArray) indPtr, colInds = positiveArray U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) if r is None: r = SparseUtilsCython.computeR(U, V, w, numAucSamples) if allArray is None: return MCEvaluatorCython.localAUCApprox(indPtr, colInds, indPtr, colInds, U, V, numAucSamples, r) else: allIndPtr, allColInd = allArray return MCEvaluatorCython.localAUCApprox(indPtr, colInds, allIndPtr, allColInd, U, V, numAucSamples, r)
def f1AtK(positiveArray, orderedItems, k, verbose=False): """ Return the F1@k measure for each row of the predicted matrix UV.T using real values in positiveArray. positiveArray is a tuple (indPtr, colInds) :param orderedItems: The ordered items for each user (users are rows, items are cols) :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions """ if type(positiveArray) != tuple: positiveArray = SparseUtils.getOmegaListPtr(positiveArray) orderedItems = orderedItems[:, 0:k] indPtr, colInds = positiveArray precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems) recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems) denominator = precisions+recalls denominator += denominator == 0 f1s = 2*precisions*recalls/denominator if verbose: return f1s, orderedItems else: return f1s.mean()
def f1AtK(positiveArray, orderedItems, k, verbose=False): """ Return the F1@k measure for each row of the predicted matrix UV.T using real values in positiveArray. positiveArray is a tuple (indPtr, colInds) :param orderedItems: The ordered items for each user (users are rows, items are cols) :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions """ if type(positiveArray) != tuple: positiveArray = SparseUtils.getOmegaListPtr(positiveArray) orderedItems = orderedItems[:, 0:k] indPtr, colInds = positiveArray precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems) recalls = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems) denominator = precisions + recalls denominator += denominator == 0 f1s = 2 * precisions * recalls / denominator if verbose: return f1s, orderedItems else: return f1s.mean()
def testStratifiedRecallAtk(self): m = 20 n = 50 r = 3 alpha = 1 X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m, n), r, density=0.2, alpha=alpha, csarray=True) itemCounts = numpy.array(X.sum(0) + 1, numpy.int32) (indPtr, colInds) = X.nonzeroRowsPtr() indPtr = numpy.array(indPtr, numpy.uint32) colInds = numpy.array(colInds, numpy.uint32) k = 5 orderedItems = numpy.random.randint(0, n, m * k) orderedItems = numpy.reshape(orderedItems, (m, k)) orderedItems = numpy.array(orderedItems, numpy.int32) beta = 0.5 recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk( indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = numpy.zeros(m) #Now compute recalls from scratch for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]] numerator = 0 for j in range(k): if orderedItems[i, j] in omegai: numerator += 1 / itemCounts[orderedItems[i, j]]**beta denominator = 0 for j in omegai: denominator += 1 / itemCounts[j]**beta recalls2[i] = numerator / denominator nptst.assert_array_equal(recalls, recalls2) #Now try to match with normal recall itemCounts = numpy.ones(n, numpy.int32) recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk( indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(recalls, recalls2)
def testStratifiedRecallAtk(self): m = 20 n = 50 r = 3 alpha = 1 X, U, V = SparseUtilsCython.generateSparseBinaryMatrixPL((m,n), r, density=0.2, alpha=alpha, csarray=True) itemCounts = numpy.array(X.sum(0)+1, numpy.int32) (indPtr, colInds) = X.nonzeroRowsPtr() indPtr = numpy.array(indPtr, numpy.uint32) colInds = numpy.array(colInds, numpy.uint32) k = 5 orderedItems = numpy.random.randint(0, n, m*k) orderedItems = numpy.reshape(orderedItems, (m, k)) orderedItems = numpy.array(orderedItems, numpy.int32) beta = 0.5 recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = numpy.zeros(m) #Now compute recalls from scratch for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]] numerator = 0 for j in range(k): if orderedItems[i, j] in omegai: numerator += 1/itemCounts[orderedItems[i, j]]**beta denominator = 0 for j in omegai: denominator += 1/itemCounts[j]**beta recalls2[i] = numerator/denominator nptst.assert_array_equal(recalls, recalls2) #Now try to match with normal recall itemCounts = numpy.ones(n, numpy.int32) recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta) recalls2 = MCEvaluatorCython.recallAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(recalls, recalls2)
def stratifiedRecallAtK(positiveArray, orderedItems, k, itemCounts, beta=0.5, verbose=False): """ Compute the average recall@k score for each row of the predicted matrix UV.T using real values in positiveArray. positiveArray is a tuple (indPtr, colInds) :param orderedItems: The ordered items for each user (users are rows, items are cols) :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions """ if type(positiveArray) != tuple: positiveArray = SparseUtils.getOmegaListPtr(positiveArray) orderedItems = orderedItems[:, 0:k] indPtr, colInds = positiveArray recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk( indPtr, colInds, orderedItems, itemCounts, beta) if verbose: return recalls, orderedItems else: return numpy.average(recalls, weights=denominators)
def testReciprocalRankAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True, csarray=True) k = 5 orderedItems = numpy.random.randint(0, n, m * k) orderedItems = numpy.reshape(orderedItems, (m, k)) orderedItems = numpy.array(orderedItems, numpy.int32) (indPtr, colInds) = X.nonzeroRowsPtr() indPtr = numpy.array(indPtr, numpy.uint32) colInds = numpy.array(colInds, numpy.uint32) rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems) rrs2 = numpy.zeros(m) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]] for j in range(k): if orderedItems[i, j] in omegai: rrs2[i] = 1 / float(1 + j) break nptst.assert_array_equal(rrs, rrs2) #Test case where no items are in ranking orderedItems = numpy.ones((m, k), numpy.int32) * (n + 1) rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(rrs, numpy.zeros(m)) #Now, make all items rank 2 for i in range(m): omegai = colInds[indPtr[i]:indPtr[i + 1]] orderedItems[i, 1] = omegai[0] rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(rrs, numpy.ones(m) * 0.5)
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 X = numpy.zeros(X.shape) omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) X[i, omegaList[i]] = 1 X = sppy.csarray(X) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) nptst.assert_array_equal(orderedItems[orderedItems2 != -1], orderedItems2[orderedItems2 != -1]) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) #items = numpy.union1d(omegaList[i], orderedItems[i, :]) #items = numpy.intersect1d(items, orderedItems2[i, :]) #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :])) #Now let's have an all zeros X X = sppy.csarray(X.shape) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) nptst.assert_array_equal(orderedItems, orderedItems2)
def testRecommendAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) k = 10 X = numpy.zeros(X.shape) omegaList = [] for i in range(m): omegaList.append(numpy.random.permutation(n)[0:5]) X[i, omegaList[i]] = 1 X = sppy.csarray(X) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList) nptst.assert_array_equal(orderedItems[orderedItems2!=-1], orderedItems2[orderedItems2!=-1]) for i in range(m): items = numpy.intersect1d(omegaList[i], orderedItems[i, :]) self.assertEquals(items.shape[0], 0) #items = numpy.union1d(omegaList[i], orderedItems[i, :]) #items = numpy.intersect1d(items, orderedItems2[i, :]) #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :])) #Now let's have an all zeros X X = sppy.csarray(X.shape) orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X) orderedItems2 = MCEvaluator.recommendAtk(U, V, k) nptst.assert_array_equal(orderedItems, orderedItems2)
def testReciprocalRankAtk(self): m = 20 n = 50 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True, csarray=True) k = 5 orderedItems = numpy.random.randint(0, n, m*k) orderedItems = numpy.reshape(orderedItems, (m, k)) orderedItems = numpy.array(orderedItems, numpy.int32) (indPtr, colInds) = X.nonzeroRowsPtr() indPtr = numpy.array(indPtr, numpy.uint32) colInds = numpy.array(colInds, numpy.uint32) rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems) rrs2 = numpy.zeros(m) for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]] for j in range(k): if orderedItems[i, j] in omegai: rrs2[i] = 1/float(1+j) break nptst.assert_array_equal(rrs, rrs2) #Test case where no items are in ranking orderedItems = numpy.ones((m, k), numpy.int32) * (n+1) rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(rrs, numpy.zeros(m)) #Now, make all items rank 2 for i in range(m): omegai = colInds[indPtr[i]:indPtr[i+1]] orderedItems[i, 1] = omegai[0] rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems) nptst.assert_array_equal(rrs, numpy.ones(m)*0.5)
def learnPredictRanking(args): """ A function to train on a training set and test on a test set, for a number of values of rho. """ learner, trainX, testX, rhos = args logging.debug("k=" + str(learner.getK())) logging.debug(learner) testInds = testX.nonzero() trainXIter = [] testIndList = [] for rho in rhos: trainXIter.append(trainX) testIndList.append(testInds) trainXIter = iter(trainXIter) ZIter = learner.learnModel(trainXIter, iter(rhos)) metrics = numpy.zeros(rhos.shape[0]) for j, Z in enumerate(ZIter): U, s, V = Z U = U * s U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) testOrderedItems = MCEvaluatorCython.recommendAtk( U, V, learner.recommendSize, trainX) if learner.metric == "mrr": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("MRR@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) elif learner.metric == "f1": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) else: raise ValueError("Unknown metric " + learner.metric) gc.collect() return metrics
def learnPredictRanking(args): """ A function to train on a training set and test on a test set, for a number of values of rho. """ learner, trainX, testX, rhos = args logging.debug("k=" + str(learner.getK())) logging.debug(learner) testInds = testX.nonzero() trainXIter = [] testIndList = [] for rho in rhos: trainXIter.append(trainX) testIndList.append(testInds) trainXIter = iter(trainXIter) ZIter = learner.learnModel(trainXIter, iter(rhos)) metrics = numpy.zeros(rhos.shape[0]) for j, Z in enumerate(ZIter): U, s, V = Z U = U*s U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, learner.recommendSize, trainX) if learner.metric == "mrr": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("MRR@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) elif learner.metric == "f1": metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % metrics[j]) + " " + str(learner)) else: raise ValueError("Unknown metric " + learner.metric) gc.collect() return metrics
def stratifiedRecallAtK(positiveArray, orderedItems, k, itemCounts, beta=0.5, verbose=False): """ Compute the average recall@k score for each row of the predicted matrix UV.T using real values in positiveArray. positiveArray is a tuple (indPtr, colInds) :param orderedItems: The ordered items for each user (users are rows, items are cols) :param verbose: If true return recall and first k recommendation for each row, otherwise just precisions """ if type(positiveArray) != tuple: positiveArray = SparseUtils.getOmegaListPtr(positiveArray) orderedItems = orderedItems[:, 0:k] indPtr, colInds = positiveArray recalls, denominators = MCEvaluatorCython.stratifiedRecallAtk(indPtr, colInds, orderedItems, itemCounts, beta) if verbose: return recalls, orderedItems else: return numpy.average(recalls, weights=denominators)
def precisionAtK(positiveArray, orderedItems, k, verbose=False): """ Compute the average precision@k score for each row of the predicted matrix UV.T using real values in positiveArray. positiveArray is a tuple (indPtr, colInds) :param orderedItems: The ordered items for each user (users are rows, items are cols) :param verbose: If true return precision and first k recommendation for each row, otherwise just precisions """ if type(positiveArray) != tuple: positiveArray = SparseUtils.getOmegaListPtr(positiveArray) orderedItems = orderedItems[:, 0:k] indPtr, colInds = positiveArray precisions = MCEvaluatorCython.precisionAtk(indPtr, colInds, orderedItems) if verbose: return precisions, orderedItems else: return precisions.mean()
def computeTestMRR(args): """ A simple function for outputing F1 for a learner in conjunction e.g. with parallel model selection. """ trainX, testX, learner = args learner.learnModel(trainX) testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX) mrr = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) try: learnerStr = learner.modelParamsStr() except: learnerStr = str(learner) logging.debug("MRR@" + str(learner.recommendSize) + ": " + str("%.4f" % mrr) + " " + learnerStr) return mrr
def computeTestF1(args): """ A simple function for outputing F1 for a learner in conjunction e.g. with parallel model selection. """ trainX, testX, learner = args learner.learnModel(trainX) testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX) f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) try: learnerStr = learner.modelParamsStr() except: learnerStr = str(learner) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % f1) + " " + learnerStr) return f1
def recordResults(self, X, trainX, testX, learner, fileName): """ Save results for a particular recommendation """ if self.algoArgs.skipRecordResults: logging.debug("Skipping final evaluation of algorithm") return allTrainMeasures = [] allTestMeasures = [] allMetaData = [] for i in range(self.algoArgs.recordFolds): metaData = [] w = 1-self.algoArgs.u logging.debug("Computing recommendation errors") maxItems = self.ps[-1] start = time.time() if type(learner) == IterativeSoftImpute: trainIterator = iter([trainX]) ZList = learner.learnModel(trainIterator) U, s, V = ZList.next() U = U*s #trainX = sppy.csarray(trainX) #testX = sppy.csarray(testX) U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) else: learner.learnModel(trainX) U = learner.U V = learner.V learnTime = time.time()-start metaData.append(learnTime) logging.debug("Getting all omega") allOmegaPtr = SparseUtils.getOmegaListPtr(X) logging.debug("Getting train omega") trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX) logging.debug("Getting test omega") testOmegaPtr = SparseUtils.getOmegaListPtr(testX) logging.debug("Getting recommendations") trainOrderedItems = MCEvaluator.recommendAtk(U, V, maxItems) testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, maxItems, trainX) colNames = [] trainMeasures = [] testMeasures = [] for p in self.ps: trainMeasures.append(MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p)) colNames.append("precision@" + str(p)) for p in self.ps: trainMeasures.append(MCEvaluator.recallAtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.recallAtK(testOmegaPtr, testOrderedItems, p)) colNames.append("recall@" + str(p)) for p in self.ps: trainMeasures.append(MCEvaluator.f1AtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, p)) colNames.append("f1@" + str(p)) for p in self.ps: trainMeasures.append(MCEvaluator.mrrAtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.mrrAtK(testOmegaPtr, testOrderedItems, p)) colNames.append("mrr@" + str(p)) try: r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples) trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r)) testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r)) w = 0.0 r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples) trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r)) testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r)) colNames.append("LAUC@" + str(self.algoArgs.u)) colNames.append("AUC") except: logging.debug("Could not compute AUCs") raise trainMeasures = numpy.array(trainMeasures) testMeasures = numpy.array(testMeasures) metaData = numpy.array(metaData) allTrainMeasures.append(trainMeasures) allTestMeasures.append(testMeasures) allMetaData.append(metaData) allTrainMeasures = numpy.array(allTrainMeasures) allTestMeasures = numpy.array(allTestMeasures) allMetaData = numpy.array(allMetaData) meanTrainMeasures = numpy.mean(allTrainMeasures, 0) meanTestMeasures = numpy.mean(allTestMeasures, 0) meanMetaData = numpy.mean(allMetaData, 0) logging.debug("Mean metrics") for i, colName in enumerate(colNames): logging.debug(colName + ":" + str('%.4f' % meanTrainMeasures[i]) + "/" + str('%.4f' % meanTestMeasures[i])) numpy.savez(fileName, meanTrainMeasures, meanTestMeasures, meanMetaData, trainOrderedItems, testOrderedItems) logging.debug("Saved file as " + fileName)
def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime): sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0]) sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0]) r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples) objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True) if trainMeasures == None: trainMeasures = [] trainMeasures.append([ objArr.sum(), MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r), time.time() - startTime, loopInd ]) printStr = "iter " + str(loopInd) + ":" printStr += " sigmaU=" + str('%.4f' % sigmaU) printStr += " sigmaV=" + str('%.4f' % sigmaV) printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0]) printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1]) if testIndPtr is not None: testMeasuresRow = [] testMeasuresRow.append( self.objectiveApprox((testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds))) testMeasuresRow.append( MCEvaluator.localAUCApprox((testIndPtr, testColInds), muU, muV, self.w, self.numRecordAucSamples, r, allArray=(allIndPtr, allColInds))) testOrderedItems = MCEvaluatorCython.recommendAtk( muU, muV, numpy.max(self.recommendSize), trainX) printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0]) printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1]) try: for p in self.recommendSize: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(f1Array[rowSamples].mean()) except: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True) testMeasuresRow.append(f1Array[rowSamples].mean()) printStr += " f1@" + str(self.recommendSize) + "=" + str( '%.4f' % testMeasuresRow[-1]) try: for p in self.recommendSize: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) except: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) printStr += " mrr@" + str(self.recommendSize) + "=" + str( '%.4f' % testMeasuresRow[-1]) testMeasures.append(testMeasuresRow) printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU)) printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV)) if self.bound: trainObj = objArr.sum() expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta) printStr += " bound=" + str('%.3f' % expectationBound) trainMeasures[-1].append(expectationBound) return printStr
os.system('taskset -p 0xffffffff %d' % os.getpid()) logging.debug("Starting training") logging.debug(maxLocalAuc) #modelSelectX = trainX[0:100, :] #maxLocalAuc.learningRateSelect(trainX) #maxLocalAuc.modelSelect(trainX) #ProfileUtils.profile('U, V, trainObjs, trainAucs, testObjs, testAucs, iterations, time = maxLocalAuc.learnModel(trainX, testX=testX, verbose=True)', globals(), locals()) U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, verbose=True) p = 10 trainOrderedItems = MCEvaluator.recommendAtk(U, V, p) testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, p, trainX) r = SparseUtilsCython.computeR(U, V, maxLocalAuc.w, maxLocalAuc.numRecordAucSamples) trainObjVec = maxLocalAuc.objectiveApprox(trainOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, full=True) testObjVec = maxLocalAuc.objectiveApprox(testOmegaPtr, U, V, r, maxLocalAuc.gi, maxLocalAuc.gp, maxLocalAuc.gq, allArray=allOmegaPtr, full=True) itemCounts = numpy.array(X.sum(0)+1, numpy.int32) beta = 0.5 for p in [1, 3, 5, 10]: trainPrecision = MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p) testPrecision = MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p) logging.debug("Train/test precision@" + str(p) + "=" + str(trainPrecision) + "/" + str(testPrecision)) for p in [1, 3, 5, 10]: trainRecall = MCEvaluator.stratifiedRecallAtK(trainOmegaPtr, trainOrderedItems, p, itemCounts, beta)
def recordResults( self, muU, muV, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime, ): sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0]) sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0]) r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples) objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True) if trainMeasures == None: trainMeasures = [] trainMeasures.append( [ objArr.sum(), MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r), time.time() - startTime, loopInd, ] ) printStr = "iter " + str(loopInd) + ":" printStr += " sigmaU=" + str("%.4f" % sigmaU) printStr += " sigmaV=" + str("%.4f" % sigmaV) printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0]) printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1]) if testIndPtr is not None: testMeasuresRow = [] testMeasuresRow.append( self.objectiveApprox( (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds) ) ) testMeasuresRow.append( MCEvaluator.localAUCApprox( (testIndPtr, testColInds), muU, muV, self.w, self.numRecordAucSamples, r, allArray=(allIndPtr, allColInds), ) ) testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX) printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0]) printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1]) try: for p in self.recommendSize: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True ) testMeasuresRow.append(f1Array[rowSamples].mean()) except: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True ) testMeasuresRow.append(f1Array[rowSamples].mean()) printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1]) try: for p in self.recommendSize: mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) except: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True ) testMeasuresRow.append(mrr[rowSamples].mean()) printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1]) testMeasures.append(testMeasuresRow) printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU)) printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV)) if self.bound: trainObj = objArr.sum() expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta) printStr += " bound=" + str("%.3f" % expectationBound) trainMeasures[-1].append(expectationBound) return printStr