Exemplo n.º 1
0
    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                csarray=True,
                                                                verbose=True,
                                                                indsPerRow=200)

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib
        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt
        #plt.plot(fpr, tpr)
        #plt.show()

        #Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
Exemplo n.º 2
0
    def testUncentre(self): 
        shape = (50, 10)
        r = 5 
        k = 100 

        X, U, s, V = SparseUtils.generateSparseLowRank(shape, r, k, verbose=True)   
        rowInds, colInds = X.nonzero()  
        
        Y = X.copy()

        inds = X.nonzero()
        X, mu1 = SparseUtils.centerRows(X)
        X, mu2 = SparseUtils.centerCols(X, inds=inds)   
        
        cX = X.copy()
        
        Y2 = SparseUtils.uncenter(X, mu1, mu2)
        
        nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3)
        
        #We try softImpute on a centered matrix and check if the results are the same 
        lmbdas = numpy.array([0.1])
        softImpute = SoftImpute(lmbdas)
        
        Z = softImpute.learnModel(cX, fullMatrices=False)
        Z = softImpute.predict([Z], cX.nonzero())[0]
        
        error1 = MCEvaluator.rootMeanSqError(cX, Z)
        
        X = SparseUtils.uncenter(cX, mu1, mu2)
        Z2 = SparseUtils.uncenter(Z, mu1, mu2)
        
        error2 = MCEvaluator.rootMeanSqError(X, Z2)
        
        self.assertAlmostEquals(error1, error2)
Exemplo n.º 3
0
 def testPredict(self): 
     #Create a set of indices 
     lmbda = 0.0 
     
     iterativeSoftImpute = IterativeSoftImpute(lmbda, k=10)
     matrixIterator = iter(self.matrixList)
     ZList = iterativeSoftImpute.learnModel(matrixIterator)
     
     XhatList = iterativeSoftImpute.predict(ZList, self.indsList)
     
     #Check we get the exact matrices returned 
     for i, Xhat in enumerate(XhatList): 
         nptst.assert_array_almost_equal(numpy.array(Xhat.todense()), self.matrixList[i].todense())
         
         self.assertEquals(Xhat.nnz, self.indsList[i].shape[0])
         
         self.assertAlmostEquals(MCEvaluator.meanSqError(Xhat, self.matrixList[i]), 0)
         self.assertAlmostEquals(MCEvaluator.rootMeanSqError(Xhat, self.matrixList[i]), 0)
         
     #Try moderate lambda 
     lmbda = 0.1 
     iterativeSoftImpute = IterativeSoftImpute(lmbda, k=10)
     matrixIterator = iter(self.matrixList)
     ZList = list(iterativeSoftImpute.learnModel(matrixIterator)) 
     
     XhatList = iterativeSoftImpute.predict(iter(ZList), self.indsList)
     
     for i, Xhat in enumerate(XhatList): 
         for ind in self.indsList[i]:
             U, s, V = ZList[i]
             Z = (U*s).dot(V.T)
             self.assertEquals(Xhat[numpy.unravel_index(ind, Xhat.shape)], Z[numpy.unravel_index(ind, Xhat.shape)])
         
         self.assertEquals(Xhat.nnz, self.indsList[i].shape[0])
Exemplo n.º 4
0
    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix(
            (m, n), k, w, csarray=True, verbose=True, indsPerRow=200
        )

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib

        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt

        # plt.plot(fpr, tpr)
        # plt.show()

        # Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
Exemplo n.º 5
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
Exemplo n.º 6
0
    def testLocalAUC(self):
        m = 10
        n = 20
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, 0.5, verbose=True, csarray=True)

        Z = U.dot(V.T)

        localAuc = numpy.zeros(m)

        for i in range(m):
            localAuc[i] = sklearn.metrics.roc_auc_score(numpy.ravel(X[i, :].toarray()), Z[i, :])

        localAuc = localAuc.mean()

        u = 0.0
        localAuc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertEquals(localAuc, localAuc2)

        # Now try a large r
        w = 1.0

        localAuc2 = MCEvaluator.localAUC(X, U, V, w)
        self.assertEquals(localAuc2, 0)
Exemplo n.º 7
0
    def testLocalAUC(self):
        m = 10
        n = 20
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                0.5,
                                                                verbose=True,
                                                                csarray=True)

        Z = U.dot(V.T)

        localAuc = numpy.zeros(m)

        for i in range(m):
            localAuc[i] = sklearn.metrics.roc_auc_score(
                numpy.ravel(X[i, :].toarray()), Z[i, :])

        localAuc = localAuc.mean()

        u = 0.0
        localAuc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertEquals(localAuc, localAuc2)

        #Now try a large r
        w = 1.0

        localAuc2 = MCEvaluator.localAUC(X, U, V, w)
        self.assertEquals(localAuc2, 0)
Exemplo n.º 8
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
Exemplo n.º 9
0
    def testWeightedLearning(self): 
        #See if the weighted learning has any effect 
        shape = (20, 20) 
        r = 20 
        numInds = 100
        noise = 0.2
        X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
        
        rho = 0.0
        iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=True)
        iterX = iter([X])
        resultIter = iterativeSoftImpute.learnModel(iterX)
        Z = resultIter.next()
        
        iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=False)
        iterX = iter([X])
        resultIter = iterativeSoftImpute.learnModel(iterX)
        Z2 = resultIter.next()
        
        #Check results when rho=0
        nptst.assert_array_almost_equal((Z[0]*Z[1]).dot(Z[2].T), (Z2[0]*Z2[1]).dot(Z2[2].T)) 
        nptst.assert_array_almost_equal(Z[1], Z2[1]) 
        
        #Then check non-uniform matrix - entries clustered around middle indices 
        shape = (20, 15) 
        numInds = 200  
        maxInd = (shape[0]*shape[1]-1)
        nzInds = numpy.array(numpy.random.randn(numInds)*maxInd/4 + maxInd/2, numpy.int) 
        trainInds = nzInds[0:int(nzInds.shape[0]/2)]
        testInds = nzInds[int(nzInds.shape[0]/2):] 
        trainInds = numpy.unique(numpy.clip(trainInds, 0, maxInd)) 
        testInds = numpy.unique(numpy.clip(testInds, 0, maxInd)) 

        trainX = ExpSU.SparseUtils.generateSparseLowRank(shape, r, trainInds, noise)
        testX = ExpSU.SparseUtils.generateSparseLowRank(shape, r, testInds, noise)
        
        #Error using weighted soft impute 
        #print("Running weighted soft impute")
        rho = 0.5
        iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=True)
        iterX = iter([trainX])
        resultIter = iterativeSoftImpute.learnModel(iterX)
        
        Z = resultIter.next() 
        iterTestX = iter([testX])
        predX = iterativeSoftImpute.predictOne(Z, testX.nonzero())
        
        error = MCEvaluator.rootMeanSqError(testX, predX)
        #print(error)
        
        iterativeSoftImpute = IterativeSoftImpute(rho, k=10, weighted=False)
        iterX = iter([trainX])
        resultIter = iterativeSoftImpute.learnModel(iterX)
        
        Z = resultIter.next() 
        iterTestX = iter([testX])
        predX = iterativeSoftImpute.predictOne(Z, testX.nonzero())
        
        error = MCEvaluator.rootMeanSqError(testX, predX)
Exemplo n.º 10
0
def computeTestAuc(args): 
    trainX, testX, maxLocalAuc, U, V  = args 
    numpy.random.seed(21)
    logging.debug(maxLocalAuc)
    
    #maxLocalAuc.learningRateSelect(trainX)
    U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(trainX, U=U, V=V, verbose=True)
    
    fprTrain, tprTrain = MCEvaluator.averageRocCurve(trainX, U, V)
    fprTest, tprTest = MCEvaluator.averageRocCurve(testX, U, V)
        
    return fprTrain, tprTrain, fprTest, tprTest
Exemplo n.º 11
0
def learnPredictRanking(args):
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner)

    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []

    for rho in rhos:
        trainXIter.append(trainX)
        testIndList.append(testInds)

    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))

    metrics = numpy.zeros(rhos.shape[0])

    for j, Z in enumerate(ZIter):
        U, s, V = Z
        U = U * s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)

        testOrderedItems = MCEvaluatorCython.recommendAtk(
            U, V, learner.recommendSize, trainX)

        if learner.metric == "mrr":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("MRR@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1":
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX),
                                            testOrderedItems,
                                            learner.recommendSize)
            logging.debug("F1@" + str(learner.recommendSize) + ": " +
                          str('%.4f' % metrics[j]) + " " + str(learner))
        else:
            raise ValueError("Unknown metric " + learner.metric)

        gc.collect()

    return metrics
Exemplo n.º 12
0
    def testModelSelect(self):
        lmbda = 0.1
        shape = (20, 20) 
        r = 20 
        numInds = 100
        noise = 0.2
        X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
        
        U, s, V = numpy.linalg.svd(X.todense())

        k = 15

        iterativeSoftImpute = IterativeSoftImpute(lmbda, k=None, svdAlg="propack", updateAlg="zero")
        iterativeSoftImpute.numProcesses = 1
        rhos = numpy.linspace(0.5, 0.001, 20)
        ks = numpy.array([k], numpy.int)
        folds = 3
        cvInds = Sampling.randCrossValidation(folds, X.nnz)
        meanTestErrors, meanTrainErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds)

        #Now do model selection manually 
        (rowInds, colInds) = X.nonzero()
        trainErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
        testErrors = numpy.zeros((rhos.shape[0], len(cvInds)))
        
        for i, rho in enumerate(rhos): 
            for j, (trainInds, testInds) in enumerate(cvInds): 
                trainX = scipy.sparse.csc_matrix(X.shape)
                testX = scipy.sparse.csc_matrix(X.shape)
                
                for p in trainInds: 
                    trainX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
                    
                for p in testInds: 
                    testX[rowInds[p], colInds[p]] = X[rowInds[p], colInds[p]]
                                 
                softImpute = SoftImpute(numpy.array([rho]), k=ks[0]) 
                ZList = [softImpute.learnModel(trainX, fullMatrices=False)]
                
                predTrainX = softImpute.predict(ZList, trainX.nonzero())[0]
                predX = softImpute.predict(ZList, testX.nonzero())[0]

                testErrors[i, j] = MCEvaluator.rootMeanSqError(testX, predX)
                trainErrors[i, j] = MCEvaluator.rootMeanSqError(trainX, predTrainX)
        
        meanTestErrors2 = testErrors.mean(1)   
        meanTrainErrors2 = trainErrors.mean(1)  
        
        nptst.assert_array_almost_equal(meanTestErrors.ravel(), meanTestErrors2, 1) 
Exemplo n.º 13
0
    def learnPredict(self, trainX, testX, k, lmbda, gamma, maxNTry=1):
        """
        A function to train on a training set and test on a test set.
        Use a copy of the base learner (allow to run several parameter sets in
        parallel) 
        """
        logging.debug("k = " + str(k) + "    lmbda = " + str(lmbda) + "    gamma = " +str(gamma))
        learner = self.baseLearner.copy()
        learner.k = k
        learner.lmbda = lmbda
        learner.gamma = gamma
        
        testInds = testX.nonzero()
    
        # train (try several time if floating point error is raised
        haveRes = False
        nTry = 0
        while not haveRes and nTry<maxNTry:
            nTry += 1
            try:
                ZIter = learner.learnModel(trainX, storeAll = False)
                haveRes = True
            except (FloatingPointError, ValueError, SGDNorm2Reg.ArithmeticError):
                pass

        if haveRes:
            logging.debug("result obtained in " + str(nTry) + " try(ies)")
            predX = learner.predict(ZIter, testX.nonzero())
            error = MCEvaluator.rootMeanSqError(testX, predX)
        else:
            logging.debug("enable to make SGD converge")
            error = float("inf")
            
        return error
Exemplo n.º 14
0
    def testAverageAuc(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix(
            (m, n), k, w, csarray=True, verbose=True, indsPerRow=200
        )

        auc = MCEvaluator.averageAuc(X, U, V)

        u = 0.0
        auc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertAlmostEquals(auc, auc2)
Exemplo n.º 15
0
    def testMeanSqError(self):
        numExamples = 10
        testX = scipy.sparse.rand(numExamples, numExamples)
        testX = testX.tocsr()

        predX = testX.copy()
        error = MCEvaluator.meanSqError(testX, predX)
        self.assertEquals(error, 0.0)

        testX = numpy.random.rand(numExamples, numExamples)
        predX = testX + numpy.random.rand(numExamples, numExamples) * 0.5

        error2 = ((testX - predX) ** 2).sum() / (numExamples ** 2)
        error = MCEvaluator.meanSqError(scipy.sparse.csr_matrix(testX), scipy.sparse.csr_matrix(predX))

        self.assertEquals(error, error2)
Exemplo n.º 16
0
def learnPredictMSE(args): 
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args 
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner) 
    
    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []    
    
    for rho in rhos: 
        trainXIter.append(trainX)
        testIndList.append(testInds)
    
    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))
    predXIter = learner.predict(ZIter, testIndList)
    
    errors = numpy.zeros(rhos.shape[0])
    for j, predX in enumerate(predXIter): 
        errors[j] = MCEvaluator.rootMeanSqError(testX, predX)
        logging.debug("Error = " + str(errors[j]))
        del predX 
        gc.collect()
        
    return errors 
Exemplo n.º 17
0
def learnPredictMSE(args):
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner)

    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []

    for rho in rhos:
        trainXIter.append(trainX)
        testIndList.append(testInds)

    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))
    predXIter = learner.predict(ZIter, testIndList)

    errors = numpy.zeros(rhos.shape[0])
    for j, predX in enumerate(predXIter):
        errors[j] = MCEvaluator.rootMeanSqError(testX, predX)
        logging.debug("Error = " + str(errors[j]))
        del predX
        gc.collect()

    return errors
Exemplo n.º 18
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n))
        )

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        # Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)
Exemplo n.º 19
0
    def testMeanSqError(self):
        numExamples = 10
        testX = scipy.sparse.rand(numExamples, numExamples)
        testX = testX.tocsr()

        predX = testX.copy()
        error = MCEvaluator.meanSqError(testX, predX)
        self.assertEquals(error, 0.0)

        testX = numpy.random.rand(numExamples, numExamples)
        predX = testX + numpy.random.rand(numExamples, numExamples) * 0.5

        error2 = ((testX - predX)**2).sum() / (numExamples**2)
        error = MCEvaluator.meanSqError(scipy.sparse.csr_matrix(testX),
                                        scipy.sparse.csr_matrix(predX))

        self.assertEquals(error, error2)
Exemplo n.º 20
0
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        k = 10
        orderedItems, scores = MCEvaluator.recommendAtk(U, V, k, verbose=True)

        #Now do it manually
        Z = U.dot(V.T)

        orderedItems2 = Util.argmaxN(Z, k)
        scores2 = numpy.fliplr(numpy.sort(Z, 1))[:, 0:k]

        nptst.assert_array_equal(orderedItems, orderedItems2)
        nptst.assert_array_equal(scores, scores2)

        #Test case where we have a set of training indices to remove
        #Let's create a random omegaList
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])

        orderedItems = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        #print(omegaList)
        #print(orderedItems)
        #print(orderedItems2)

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            items = numpy.union1d(omegaList[i], orderedItems[i, :])
            items = numpy.intersect1d(items, orderedItems2[i, :])
            nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
Exemplo n.º 21
0
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        k = 10

        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1

        X = sppy.csarray(X)

        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)

        nptst.assert_array_equal(orderedItems[orderedItems2 != -1],
                                 orderedItems2[orderedItems2 != -1])

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))

        #Now let's have an all zeros X
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        nptst.assert_array_equal(orderedItems, orderedItems2)
Exemplo n.º 22
0
def learnPredictRanking(args): 
    """
    A function to train on a training set and test on a test set, for a number 
    of values of rho. 
    """
    learner, trainX, testX, rhos = args 
    logging.debug("k=" + str(learner.getK()))
    logging.debug(learner) 
    
    testInds = testX.nonzero()
    trainXIter = []
    testIndList = []    
    
    for rho in rhos: 
        trainXIter.append(trainX)
        testIndList.append(testInds)
    
    trainXIter = iter(trainXIter)

    ZIter = learner.learnModel(trainXIter, iter(rhos))
    
    metrics = numpy.zeros(rhos.shape[0])
    
    for j, Z in enumerate(ZIter): 
        U, s, V = Z
        U = U*s
        U = numpy.ascontiguousarray(U)
        V = numpy.ascontiguousarray(V)
        
        testOrderedItems = MCEvaluatorCython.recommendAtk(U, V, learner.recommendSize, trainX)
        
        if learner.metric == "mrr": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("MRR@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        elif learner.metric == "f1": 
            metrics[j] = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize) 
            logging.debug("F1@" + str(learner.recommendSize) +  ": " + str('%.4f' % metrics[j]) + " " + str(learner))
        else: 
            raise ValueError("Unknown metric " + learner.metric)
            
        gc.collect()
        
    return metrics 
Exemplo n.º 23
0
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)

        k = 10
        orderedItems, scores = MCEvaluator.recommendAtk(U, V, k, verbose=True)

        # Now do it manually
        Z = U.dot(V.T)

        orderedItems2 = Util.argmaxN(Z, k)
        scores2 = numpy.fliplr(numpy.sort(Z, 1))[:, 0:k]

        nptst.assert_array_equal(orderedItems, orderedItems2)
        nptst.assert_array_equal(scores, scores2)

        # Test case where we have a set of training indices to remove
        # Let's create a random omegaList
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])

        orderedItems = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        # print(omegaList)
        # print(orderedItems)
        # print(orderedItems2)

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            items = numpy.union1d(omegaList[i], orderedItems[i, :])
            items = numpy.intersect1d(items, orderedItems2[i, :])
            nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
Exemplo n.º 24
0
    def testAverageAuc(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                csarray=True,
                                                                verbose=True,
                                                                indsPerRow=200)

        auc = MCEvaluator.averageAuc(X, U, V)

        u = 0.0
        auc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertAlmostEquals(auc, auc2)
Exemplo n.º 25
0
    def testRecommendAtk(self): 
        m = 20 
        n = 50 
        r = 3 

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True)

        import sppy 
        X = sppy.csarray(X)  
        
        k = 10        
        
        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m): 
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1
            
        X = sppy.csarray(X)            
        
        
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
                
        nptst.assert_array_equal(orderedItems[orderedItems2!=-1], orderedItems2[orderedItems2!=-1])

        for i in range(m): 
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)
            
            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
            
        #Now let's have an all zeros X 
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k) 
        
        nptst.assert_array_equal(orderedItems, orderedItems2)
Exemplo n.º 26
0
    def testUncentre(self):
        shape = (50, 10)
        r = 5
        k = 100

        X, U, s, V = SparseUtils.generateSparseLowRank(shape,
                                                       r,
                                                       k,
                                                       verbose=True)
        rowInds, colInds = X.nonzero()

        Y = X.copy()

        inds = X.nonzero()
        X, mu1 = SparseUtils.centerRows(X)
        X, mu2 = SparseUtils.centerCols(X, inds=inds)

        cX = X.copy()

        Y2 = SparseUtils.uncenter(X, mu1, mu2)

        nptst.assert_array_almost_equal(Y.todense(), Y2.todense(), 3)

        #We try softImpute on a centered matrix and check if the results are the same
        lmbdas = numpy.array([0.1])
        softImpute = SoftImpute(lmbdas)

        Z = softImpute.learnModel(cX, fullMatrices=False)
        Z = softImpute.predict([Z], cX.nonzero())[0]

        error1 = MCEvaluator.rootMeanSqError(cX, Z)

        X = SparseUtils.uncenter(cX, mu1, mu2)
        Z2 = SparseUtils.uncenter(Z, mu1, mu2)

        error2 = MCEvaluator.rootMeanSqError(X, Z2)

        self.assertAlmostEquals(error1, error2)
Exemplo n.º 27
0
def computePrecision(args): 
    trainX, testX, testOmegaList, learner  = args 
    
    (m, n) = trainX.shape
                                             
    learner.learnModel(trainX)
    maxItems = 20
    orderedItems = learner.predict(maxItems)
    #print(orderedItems)
    precision = MCEvaluator.precisionAtK(testX, orderedItems, maxItems)
        
    logging.debug("Precision@" + str(maxItems) + ": " + str(precision) + " with k = " + str(learner.k))
        
    return precision
Exemplo n.º 28
0
def localAucsLmbdas(args): 
    trainX, testX, testOmegaList, learner  = args 
    
    (m, n) = trainX.shape
                        
    localAucs = numpy.zeros(learner.lmbdas.shape[0])

    for j, lmbda in enumerate(learner.lmbdas): 
        learner.lmbda = lmbda 
        
        U, V = learner.learnModel(trainX)
        
        r = SparseUtilsCython.computeR(U, V, 1-learner.u, learner.numAucSamples)
        localAucs[j] = MCEvaluator.localAUCApprox(testX, U, V, testOmegaList, learner.numAucSamples, r) 
        logging.debug("Local AUC: " + str(localAucs[j]) + " with k = " + str(learner.k) + " and lmbda= " + str(learner.lmbda))
        
    return localAucs
Exemplo n.º 29
0
    def testPrecisionAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        #print(MCEvaluator.precisionAtK(X, U*s, V, 2))

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n),
                                X.nnz / float(m * n))

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :],
                                              nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())

        #Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :],
                                              nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())
Exemplo n.º 30
0
    def testRecallAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0)

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :],
                                           nonzeroRow).shape[0] / float(
                                               nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())

        #Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :],
                                           nonzeroRow).shape[0] / float(
                                               nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())
Exemplo n.º 31
0
    def testPredict(self): 
        k = 5
        lmbda = 0.01
        eps = 0.000001         
        tmax = 1000        
        
        learner = IterativeSGDNorm2Reg(k, lmbda, eps, tmax)
        
        ZList = learner.learnModel(iter(self.matrixList)) 
        
        indList = []
        for X in self.matrixList: 
            indList.append(X.nonzero())

        XList = learner.predict(ZList, indList)    

        for i, Xhat in enumerate(XList): 
            #print(Xhat)
            print(MCEvaluator.rootMeanSqError(Xhat, self.matrixList[i]))
Exemplo n.º 32
0
    def testPredict(self):
        k = 5
        lmbda = 0.01
        eps = 0.000001
        tmax = 1000

        learner = IterativeSGDNorm2Reg(k, lmbda, eps, tmax)

        ZList = learner.learnModel(iter(self.matrixList))

        indList = []
        for X in self.matrixList:
            indList.append(X.nonzero())

        XList = learner.predict(ZList, indList)

        for i, Xhat in enumerate(XList):
            #print(Xhat)
            print(MCEvaluator.rootMeanSqError(Xhat, self.matrixList[i]))
Exemplo n.º 33
0
def computeTestMRR(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V, learner.recommendSize, trainX)
    mrr = MCEvaluator.mrrAtK(SparseUtils.getOmegaListPtr(testX), testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("MRR@" + str(learner.recommendSize) + ": " + str("%.4f" % mrr) + " " + learnerStr)

    return mrr
Exemplo n.º 34
0
    def testTrain(self): 
        args = BPRArgs()   
        args.learning_rate = 0.1
        k = 5
        
        learner = BPR(k, args)    
        
        maxIterations = 10
        sample_negative_items_empirically = True
        sampler = UniformUserUniformItem()

        user_factors, item_factors = learner.train(self.X, sampler, maxIterations)
        print(MCEvaluator.averageAuc(self.X, user_factors, item_factors))
        
        #Let's try regularisation 
        args.user_regularization = 1
        learner.train(self.X, sampler, maxIterations)
        
        #Let's try regularisation 
        args.positive_item_regularization = 1
Exemplo n.º 35
0
def localAucsLmbdas(args):
    trainX, testX, testOmegaList, learner = args

    (m, n) = trainX.shape

    localAucs = numpy.zeros(learner.lmbdas.shape[0])

    for j, lmbda in enumerate(learner.lmbdas):
        learner.lmbda = lmbda

        U, V = learner.learnModel(trainX)

        r = SparseUtilsCython.computeR(U, V, 1 - learner.u,
                                       learner.numAucSamples)
        localAucs[j] = MCEvaluator.localAUCApprox(testX, U, V, testOmegaList,
                                                  learner.numAucSamples, r)
        logging.debug("Local AUC: " + str(localAucs[j]) + " with k = " +
                      str(learner.k) + " and lmbda= " + str(learner.lmbda))

    return localAucs
Exemplo n.º 36
0
    def testModelSelect2(self): 
        rho = 0.1
        shape = (20, 20) 
        r = 20 
        numInds = 100
        noise = 0.2
        X = ExpSU.SparseUtils.generateSparseLowRank(shape, r, numInds, noise)
        X = X.tocsc()
        
        U, s, V = numpy.linalg.svd(X.todense())

        k = 15

        iterativeSoftImpute = IterativeSoftImpute(rho, k=None, svdAlg="propack", updateAlg="initial")
        rhos = numpy.linspace(0.5, 0.001, 5)
        ks = numpy.array([5, 10, 15], numpy.int)
        folds = 3
        
        cvInds = [] 
        for i in range(folds): 
            cvInds.append((numpy.arange(X.nnz), numpy.arange(X.nnz)))
        
        meanTestErrors, stdTestErrors = iterativeSoftImpute.modelSelect(X, rhos, ks, cvInds)
       
        self.assertAlmostEquals(numpy.linalg.norm(stdTestErrors), 0, 3)
        
        meanTestErrors2 = numpy.zeros((rhos.shape[0], ks.shape[0]))        
        
        #Now compute errors manually 
        for j, k in enumerate(ks): 
            iterativeSoftImpute.setK(k)
            for i, rho in enumerate(rhos): 
                iterativeSoftImpute.setRho(rho)
                ZIter = iterativeSoftImpute.learnModel(iter([X]))
                indList = [X.nonzero()]
                outIterator = iterativeSoftImpute.predict(ZIter, indList)
                Xhat = outIterator.next()
    
                meanTestErrors2[i, j] = MCEvaluator.rootMeanSqError(X, Xhat)

        nptst.assert_array_almost_equal(meanTestErrors, meanTestErrors2, 2)
Exemplo n.º 37
0
    def testLearnModel(self):
        numpy.random.seed(21)
        X = scipy.sparse.rand(10, 10, 0.5)
        X = X.tocsr()

        method = "lsnmf"

        nimfaFactorise = NimfaFactorise(method, maxIter=50)
        predX = nimfaFactorise.learnModel(X)

        self.assertEquals(predX.shape, X.shape)

        #Test the case where we specify many ranks
        ranks = numpy.array([10, 8, 5, 2])
        nimfaFactorise = NimfaFactorise(method, ranks)
        predXList = nimfaFactorise.learnModel(X)

        #Let's look at the errors
        for predX in predXList:
            error = MCEvaluator.meanSqError(X, predX)
            print(error)
Exemplo n.º 38
0
 def testLearnModel(self): 
     numpy.random.seed(21)
     X = scipy.sparse.rand(10, 10, 0.5)
     X = X.tocsr()
             
     method = "lsnmf"
     
     nimfaFactorise = NimfaFactorise(method, maxIter=50)
     predX = nimfaFactorise.learnModel(X)
     
     self.assertEquals(predX.shape, X.shape)
     
     #Test the case where we specify many ranks 
     ranks = numpy.array([10, 8, 5, 2])
     nimfaFactorise = NimfaFactorise(method, ranks)
     predXList = nimfaFactorise.learnModel(X)
     
     #Let's look at the errors 
     for predX in predXList: 
         error = MCEvaluator.meanSqError(X, predX)
         print(error)
Exemplo n.º 39
0
    def testLocalAucApprox2(self):
        m = 100
        n = 200
        k = 5
        numInds = 100
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        r = numpy.ones(m) * -10

        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                               U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)

        #Now set a high r
        Z = U.dot(V.T)
        localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                              U, V, w, sampleSize)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                               U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)
Exemplo n.º 40
0
def computeTestF1(args):
    """
    A simple function for outputing F1 for a learner in conjunction e.g. with 
    parallel model selection. 
    """
    trainX, testX, learner = args

    learner.learnModel(trainX)

    testOrderedItems = MCEvaluatorCython.recommendAtk(learner.U, learner.V,
                                                      learner.recommendSize,
                                                      trainX)
    f1 = MCEvaluator.f1AtK(SparseUtils.getOmegaListPtr(testX),
                           testOrderedItems, learner.recommendSize)

    try:
        learnerStr = learner.modelParamsStr()
    except:
        learnerStr = str(learner)

    logging.debug("F1@" + str(learner.recommendSize) + ": " +
                  str('%.4f' % f1) + " " + learnerStr)

    return f1
Exemplo n.º 41
0
    def learnPredict(self, trainX, testX, k, lmbda, gamma, maxNTry=1):
        """
        A function to train on a training set and test on a test set.
        Use a copy of the base learner (allow to run several parameter sets in
        parallel) 
        """
        logging.debug("k = " + str(k) + "    lmbda = " + str(lmbda) +
                      "    gamma = " + str(gamma))
        learner = self.baseLearner.copy()
        learner.k = k
        learner.lmbda = lmbda
        learner.gamma = gamma

        testInds = testX.nonzero()

        # train (try several time if floating point error is raised
        haveRes = False
        nTry = 0
        while not haveRes and nTry < maxNTry:
            nTry += 1
            try:
                ZIter = learner.learnModel(trainX, storeAll=False)
                haveRes = True
            except (FloatingPointError, ValueError,
                    SGDNorm2Reg.ArithmeticError):
                pass

        if haveRes:
            logging.debug("result obtained in " + str(nTry) + " try(ies)")
            predX = learner.predict(ZIter, testX.nonzero())
            error = MCEvaluator.rootMeanSqError(testX, predX)
        else:
            logging.debug("enable to make SGD converge")
            error = float("inf")

        return error
Exemplo n.º 42
0
    def testPrecisionAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)

        # print(MCEvaluator.precisionAtK(X, U*s, V, 2))

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n), X.nnz / float(m * n))

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())

        # Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())
Exemplo n.º 43
0
    def testRecallAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0)

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())

        # Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())
Exemplo n.º 44
0
    def testLocalAucApprox2(self):
        m = 100
        n = 200
        k = 5
        numInds = 100
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True)

        r = numpy.ones(m) * -10

        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)

        # Now set a high r
        Z = U.dot(V.T)
        localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)
Exemplo n.º 45
0
 def predict(self, maxItems):
     return MCEvaluator.recommendAtk(self.U, self.U, maxItems)
Exemplo n.º 46
0
    def recordResults(self, ZIter, learner, fileName):
        """
        Save results for a particular recommendation 
        """
        trainIterator = self.getTrainIterator()
        testIterator = self.testXIteratorFunc()
        measures = []
        metadata = []
        vectorMetaData = []
        logging.debug("Computing recommendation errors")
        
        while True: 
            try: 
                start = time.time()
                Z = next(ZIter) 
                learnTime = time.time()-start 
            except StopIteration: 
                break 
                
            if self.algoArgs.verbose: 
                print(learner.measures) 
                vectorMetaData.append(learner.measures) 
            
            trainX = next(trainIterator)
            if not self.algoArgs.trainError: 
                del trainX 
                gc.collect()
            
            testX = next(testIterator)
            predTestX = learner.predictOne(Z, testX.nonzero())
            predTestX.eliminate_zeros()
            predTestX = trainIterator.uncenter(predTestX)
            currentMeasures = [MCEvaluator.rootMeanSqError(testX, predTestX), MCEvaluator.meanAbsError(testX, predTestX)]
            
            if self.algoArgs.trainError:
                assert trainX.shape == testX.shape
                predTrainX = learner.predictOne(Z, trainX.nonzero())  
                predTrainX.eliminate_zeros()
                predTrainX = trainIterator.uncenter(predTrainX)
                trainX.eliminate_zeros()
                trainX = trainIterator.uncenter(trainX)
                currentMeasures.append(MCEvaluator.rootMeanSqError(trainX, predTrainX))
                del trainX 
                gc.collect()
            
            logging.debug("Error measures: " + str(currentMeasures))
            logging.debug("Standard deviation of test set " + str(testX.data.std()))
            measures.append(currentMeasures)
            
            #Store some metadata about the learning process 
            if type(learner) == IterativeSoftImpute: 
                metadata.append([Z[0].shape[1], learner.getRho(), learnTime])
            elif type(learner) == IterativeSGDNorm2Reg: 
                metadata.append([Z[0][0].shape[1], learner.getLambda(), learnTime])

        measures = numpy.array(measures)
        metadata = numpy.array(metadata)
        vectorMetaData = numpy.array(vectorMetaData)
        
        logging.debug(measures)
        numpy.savez(fileName, measures, metadata, vectorMetaData)
        logging.debug("Saved file as " + fileName)
Exemplo n.º 47
0
 def run():
     for i in range(numRuns):
         MCEvaluator.localAUCApprox(X, U, V, omegaList, numAucSamples,
                                    r)
Exemplo n.º 48
0
    def recordResults(self, muU, muV, trainMeasures, testMeasures, loopInd,
                      rowSamples, indPtr, colInds, testIndPtr, testColInds,
                      allIndPtr, allColInds, gi, gp, gq, trainX, startTime):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w,
                                       self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds),
                                      muU,
                                      muV,
                                      r,
                                      gi,
                                      gp,
                                      gq,
                                      full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append([
            objArr.sum(),
            MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w,
                                       self.numRecordAucSamples, r),
            time.time() - startTime, loopInd
        ])

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str('%.4f' % sigmaU)
        printStr += " sigmaV=" + str('%.4f' % sigmaV)
        printStr += " train: obj~" + str('%.4f' % trainMeasures[-1][0])
        printStr += " LAUC~" + str('%.4f' % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox((testIndPtr, testColInds),
                                     muU,
                                     muV,
                                     r,
                                     gi,
                                     gp,
                                     gq,
                                     allArray=(allIndPtr, allColInds)))
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox((testIndPtr, testColInds),
                                           muU,
                                           muV,
                                           self.w,
                                           self.numRecordAucSamples,
                                           r,
                                           allArray=(allIndPtr, allColInds)))
            testOrderedItems = MCEvaluatorCython.recommendAtk(
                muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str('%.4f' % testMeasuresRow[0])
            printStr += " LAUC~" + str('%.4f' % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK(
                        (testIndPtr, testColInds),
                        testOrderedItems,
                        p,
                        verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds),
                    testOrderedItems,
                    self.recommendSize,
                    verbose=True)
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str(
                '%.4f' % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str('%.3f' % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str('%.3f' % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj,
                                                 self.delta)
            printStr += " bound=" + str('%.3f' % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Exemplo n.º 49
0
 def predict(self, maxItems):
     return MCEvaluator.recommendAtk(self.U, self.V, maxItems)
Exemplo n.º 50
0
    def recordResults(
        self,
        muU,
        muV,
        trainMeasures,
        testMeasures,
        loopInd,
        rowSamples,
        indPtr,
        colInds,
        testIndPtr,
        testColInds,
        allIndPtr,
        allColInds,
        gi,
        gp,
        gq,
        trainX,
        startTime,
    ):

        sigmaU = self.getSigma(loopInd, self.alpha, muU.shape[0])
        sigmaV = self.getSigma(loopInd, self.alpha, muU.shape[0])
        r = SparseUtilsCython.computeR(muU, muV, self.w, self.numRecordAucSamples)
        objArr = self.objectiveApprox((indPtr, colInds), muU, muV, r, gi, gp, gq, full=True)
        if trainMeasures == None:
            trainMeasures = []
        trainMeasures.append(
            [
                objArr.sum(),
                MCEvaluator.localAUCApprox((indPtr, colInds), muU, muV, self.w, self.numRecordAucSamples, r),
                time.time() - startTime,
                loopInd,
            ]
        )

        printStr = "iter " + str(loopInd) + ":"
        printStr += " sigmaU=" + str("%.4f" % sigmaU)
        printStr += " sigmaV=" + str("%.4f" % sigmaV)
        printStr += " train: obj~" + str("%.4f" % trainMeasures[-1][0])
        printStr += " LAUC~" + str("%.4f" % trainMeasures[-1][1])

        if testIndPtr is not None:
            testMeasuresRow = []
            testMeasuresRow.append(
                self.objectiveApprox(
                    (testIndPtr, testColInds), muU, muV, r, gi, gp, gq, allArray=(allIndPtr, allColInds)
                )
            )
            testMeasuresRow.append(
                MCEvaluator.localAUCApprox(
                    (testIndPtr, testColInds),
                    muU,
                    muV,
                    self.w,
                    self.numRecordAucSamples,
                    r,
                    allArray=(allIndPtr, allColInds),
                )
            )
            testOrderedItems = MCEvaluatorCython.recommendAtk(muU, muV, numpy.max(self.recommendSize), trainX)

            printStr += " validation: obj~" + str("%.4f" % testMeasuresRow[0])
            printStr += " LAUC~" + str("%.4f" % testMeasuresRow[1])

            try:
                for p in self.recommendSize:
                    f1Array, orderedItems = MCEvaluator.f1AtK(
                        (testIndPtr, testColInds), testOrderedItems, p, verbose=True
                    )
                    testMeasuresRow.append(f1Array[rowSamples].mean())
            except:
                f1Array, orderedItems = MCEvaluator.f1AtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(f1Array[rowSamples].mean())

            printStr += " f1@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])

            try:
                for p in self.recommendSize:
                    mrr, orderedItems = MCEvaluator.mrrAtK((testIndPtr, testColInds), testOrderedItems, p, verbose=True)
                    testMeasuresRow.append(mrr[rowSamples].mean())
            except:
                mrr, orderedItems = MCEvaluator.mrrAtK(
                    (testIndPtr, testColInds), testOrderedItems, self.recommendSize, verbose=True
                )
                testMeasuresRow.append(mrr[rowSamples].mean())

            printStr += " mrr@" + str(self.recommendSize) + "=" + str("%.4f" % testMeasuresRow[-1])
            testMeasures.append(testMeasuresRow)

        printStr += " ||U||=" + str("%.3f" % numpy.linalg.norm(muU))
        printStr += " ||V||=" + str("%.3f" % numpy.linalg.norm(muV))

        if self.bound:
            trainObj = objArr.sum()

            expectationBound = self.computeBound(trainX, muU, muV, trainObj, self.delta)
            printStr += " bound=" + str("%.3f" % expectationBound)
            trainMeasures[-1].append(expectationBound)

        return printStr
Exemplo n.º 51
0
                trainX = X
                
                if modelSelect: 
                    modelSelectX, userInds = Sampling.sampleUsers2(trainX, modelSelectSamples)
                    meanMetrics, stdMetrics = learner.modelSelect(modelSelectX)
                
                learner.learnModel(X)
                U = learner.U 
                V = learner.V 
            
            if type(learner) != CosineKNNRecommender:
                U = numpy.ascontiguousarray(U)
                V = numpy.ascontiguousarray(V)
                
                #Note that we compute UU^T for recommendations 
                orderedItems, scores = MCEvaluator.recommendAtk(U, U, maxItems, verbose=True)
                orderedItems2, scores2 = MCEvaluator.recommendAtk(U.dot(V.T.dot(V)), U, maxItems, verbose=True)
            else: 
                orderedItems2 = orderedItems 
                scores2 = scores 
            
            #Normalise scores 
            scores /= numpy.max(scores)                
            
            meanStatsContacts, meanStatsInterests = saveResults(orderedItems, scores, dataset, similaritiesFileName, contactsFilename, interestsFilename, minScore, minContacts, minAcceptableSims)
            meanStatsContacts2, meanStatsInterests2 = saveResults(orderedItems2, scores2, dataset, similaritiesFileName, contactsFilename, interestsFilename, minScore, minContacts, minAcceptableSims)

            numpy.savez(outputFilename, meanStatsContacts, meanStatsInterests, meanStatsContacts2, meanStatsInterests2)
            logging.debug("Saved precisions/recalls on contacts/interests as " + outputFilename)
    
        finally: 
Exemplo n.º 52
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False),
            2 * r / float(n) / (1 + r / float(n)))

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        #Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] +
                                                         recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)