def testF1Atk(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) self.assertAlmostEquals( MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n)) ) m = 20 n = 50 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) k = 5 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2) # Test case where we get a zero precision or recall orderedItems[5, :] = -1 precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): if precision[i] + recall[i] != 0: f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2)
def computeTestF1(args): """ A simple function for outputing F1 for a learner in conjunction e.g. with parallel model selection. """ trainX, testX, learner = args learner.learnModel(trainX) testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX) f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) try: learnerStr = learner.modelParamsStr() except: learnerStr = str(learner) logging.debug("F1@" + str(learner.recommendSize) + ": " + str("%.4f" % f1) + " " + learnerStr) return f1
def computeTestF1(args): """ A simple function for outputing F1 for a learner in conjunction e.g. with parallel model selection. """ trainX, testX, learner = args learner.learnModel(trainX) testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX) f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) try: learnerStr = learner.modelParamsStr() except: learnerStr = str(learner) logging.debug("F1@" + str(learner.recommendSize) + ": " + str('%.4f' % f1) + " " + learnerStr) return f1
def testF1Atk(self): m = 10 n = 5 r = 3 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) import sppy X = sppy.csarray(X) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) self.assertAlmostEquals( MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n))) m = 20 n = 50 X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True) k = 5 orderedItems = MCEvaluator.recommendAtk(U * s, V, k) precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) orderedItems = MCEvaluator.recommendAtk(U * s, V, n) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2) #Test case where we get a zero precision or recall orderedItems[5, :] = -1 precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True) recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True) f1s = numpy.zeros(m) for i in range(m): if precision[i] + recall[i] != 0: f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i]) f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True) nptst.assert_array_equal(f1s, f1s2)
def recordResults( self, muU, muV, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime, ): sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0]) sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0]) r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples) objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True) if trainMeasures == None: trainMeasures = [] trainMeasures.append( [ objArr.sum(), MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r), time.time() - startTime, loopInd, ] ) printStr = "iter " + str(loopInd) + ":" printStr += " sigmaU=" + str("%.4f" % sigmaU) printStr += " sigmaV=" + str("%.4f" % sigmaV) printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0]) printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1]) if testIndPtr is not None: testMeasuresRow = [] testMeasuresRow.append( self.objectiveApprox( (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds) ) ) testMeasuresRow.append( MCEvaluator.localAUCApprox( (testIndPtr, testColInds), muU, muV, self.w, self.numRecordAucSamples, r, allArray=(allIndPtr, allColInds), ) ) testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX) printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0]) printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1]) try: for p in self.recommendSize: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True ) testMeasuresRow.append(f1Array[rowSamples].mean()) except: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True ) testMeasuresRow.append(f1Array[rowSamples].mean()) printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1]) try: for p in self.recommendSize: mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) except: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True ) testMeasuresRow.append(mrr[rowSamples].mean()) printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1]) testMeasures.append(testMeasuresRow) printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU)) printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV)) if self.bound: trainObj = objArr.sum() expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta) printStr += " bound=" + str("%.3f" % expectationBound) trainMeasures[-1].append(expectationBound) return printStr
def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd, rowSamples, indPtr, colInds, testIndPtr, testColInds, allIndPtr, allColInds, gi, gp, gq, trainX, startTime): sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0]) sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0]) r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples) objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True) if trainMeasures == None: trainMeasures = [] trainMeasures.append([ objArr.sum(), MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r), time.time() - startTime, loopInd ]) printStr = "iter " + str(loopInd) + ":" printStr += " sigmaU=" + str('%.4f' % sigmaU) printStr += " sigmaV=" + str('%.4f' % sigmaV) printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0]) printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1]) if testIndPtr is not None: testMeasuresRow = [] testMeasuresRow.append( self.objectiveApprox((testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds))) testMeasuresRow.append( MCEvaluator.localAUCApprox((testIndPtr, testColInds), muU, muV, self.w, self.numRecordAucSamples, r, allArray=(allIndPtr, allColInds))) testOrderedItems = MCEvaluatorCython.recommendAtk( muU, muV, numpy.max(self.recommendSize), trainX) printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0]) printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1]) try: for p in self.recommendSize: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(f1Array[rowSamples].mean()) except: f1Array, orderedItems = MCEvaluator.f1AtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True) testMeasuresRow.append(f1Array[rowSamples].mean()) printStr += " f1@" + str(self.recommendSize) + "=" + str( '%.4f' % testMeasuresRow[-1]) try: for p in self.recommendSize: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, p, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) except: mrr, orderedItems = MCEvaluator.mrrAtK( (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True) testMeasuresRow.append(mrr[rowSamples].mean()) printStr += " mrr@" + str(self.recommendSize) + "=" + str( '%.4f' % testMeasuresRow[-1]) testMeasures.append(testMeasuresRow) printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU)) printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV)) if self.bound: trainObj = objArr.sum() expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta) printStr += " bound=" + str('%.3f' % expectationBound) trainMeasures[-1].append(expectationBound) return printStr
#Look at distribution of train and test objectives plt.figure(5) hist, edges = numpy.histogram(trainObjVec, bins=50, normed=True) xvals = (edges[0:-1]+edges[1:])/2 plt.plot(xvals, hist, label="train") hist, e = numpy.histogram(testObjVec, bins=edges, normed=True) xvals = (edges[0:-1]+edges[1:])/2 plt.plot(xvals, hist, label="test") plt.legend() plt.figure(6) plt.scatter(trainObjVec, trainX.sum(1)) #See precisions f1s, orderedItems = MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, maxLocalAuc.recommendSize, verbose=True) uniqp, inverse = numpy.unique(f1s, return_inverse=True) print(uniqp, numpy.bincount(inverse)) numItems = trainX.sum(1) print(numpy.corrcoef(numItems, f1s)) print(numpy.corrcoef(trainObjVec, f1s)) print(numpy.corrcoef(testObjVec, f1s)) #fprTrain, tprTrain = MCEvaluator.averageRocCurve(trainX, U, V) #fprTest, tprTest = MCEvaluator.averageRocCurve(testX, U, V) # #plt.figure(7) #plt.plot(fprTrain, tprTrain, label="train") #plt.plot(fprTest, tprTest, label="test") #plt.xlabel("mean false positive rate")
def recordResults(self, X, trainX, testX, learner, fileName): """ Save results for a particular recommendation """ if self.algoArgs.skipRecordResults: logging.debug("Skipping final evaluation of algorithm") return allTrainMeasures = [] allTestMeasures = [] allMetaData = [] for i in range(self.algoArgs.recordFolds): metaData = [] w = 1-self.algoArgs.u logging.debug("Computing recommendation errors") maxItems = self.ps[-1] start = time.time() if type(learner) == IterativeSoftImpute: trainIterator = iter([trainX]) ZList = learner.learnModel(trainIterator) U, s, V = ZList.next() U = U*s #trainX = sppy.csarray(trainX) #testX = sppy.csarray(testX) U = numpy.ascontiguousarray(U) V = numpy.ascontiguousarray(V) else: learner.learnModel(trainX) U = learner.U V = learner.V learnTime = time.time()-start metaData.append(learnTime) logging.debug("Getting all omega") allOmegaPtr = SparseUtils.getOmegaListPtr(X) logging.debug("Getting train omega") trainOmegaPtr = SparseUtils.getOmegaListPtr(trainX) logging.debug("Getting test omega") testOmegaPtr = SparseUtils.getOmegaListPtr(testX) logging.debug("Getting recommendations") trainOrderedItems = MCEvaluator.recommendAtk(U, V, maxItems) testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, maxItems, trainX) colNames = [] trainMeasures = [] testMeasures = [] for p in self.ps: trainMeasures.append(MCEvaluator.precisionAtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.precisionAtK(testOmegaPtr, testOrderedItems, p)) colNames.append("precision@" + str(p)) for p in self.ps: trainMeasures.append(MCEvaluator.recallAtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.recallAtK(testOmegaPtr, testOrderedItems, p)) colNames.append("recall@" + str(p)) for p in self.ps: trainMeasures.append(MCEvaluator.f1AtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.f1AtK(testOmegaPtr, testOrderedItems, p)) colNames.append("f1@" + str(p)) for p in self.ps: trainMeasures.append(MCEvaluator.mrrAtK(trainOmegaPtr, trainOrderedItems, p)) testMeasures.append(MCEvaluator.mrrAtK(testOmegaPtr, testOrderedItems, p)) colNames.append("mrr@" + str(p)) try: r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples) trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r)) testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r)) w = 0.0 r = SparseUtilsCython.computeR(U, V, w, self.algoArgs.numRecordAucSamples) trainMeasures.append(MCEvaluator.localAUCApprox(trainOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, r=r)) testMeasures.append(MCEvaluator.localAUCApprox(testOmegaPtr, U, V, w, self.algoArgs.numRecordAucSamples, allArray=allOmegaPtr, r=r)) colNames.append("LAUC@" + str(self.algoArgs.u)) colNames.append("AUC") except: logging.debug("Could not compute AUCs") raise trainMeasures = numpy.array(trainMeasures) testMeasures = numpy.array(testMeasures) metaData = numpy.array(metaData) allTrainMeasures.append(trainMeasures) allTestMeasures.append(testMeasures) allMetaData.append(metaData) allTrainMeasures = numpy.array(allTrainMeasures) allTestMeasures = numpy.array(allTestMeasures) allMetaData = numpy.array(allMetaData) meanTrainMeasures = numpy.mean(allTrainMeasures, 0) meanTestMeasures = numpy.mean(allTestMeasures, 0) meanMetaData = numpy.mean(allMetaData, 0) logging.debug("Mean metrics") for i, colName in enumerate(colNames): logging.debug(colName + ":" + str('%.4f' % meanTrainMeasures[i]) + "/" + str('%.4f' % meanTestMeasures[i])) numpy.savez(fileName, meanTrainMeasures, meanTestMeasures, meanMetaData, trainOrderedItems, testOrderedItems) logging.debug("Saved file as " + fileName)