Exemplo n.º 1
0
    def testSampleUsers(self): 
        m = 10
        n = 15
        r = 5 
        u = 0.3
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

        k = 50
        X2, userInds = Sampling.sampleUsers(X, k)

        nptst.assert_array_equal(X.toarray(), X2.toarray())
        
        numRuns = 50
        for i in range(numRuns): 
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            k = numpy.random.randint(10, 100)

            X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

            X2, userInds = Sampling.sampleUsers(X, k)
            
            self.assertEquals(X2.shape[0], min(k, m))
            self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
            self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
            self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
Exemplo n.º 2
0
    def testLocalAUC(self):
        m = 10
        n = 20
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, 0.5, verbose=True, csarray=True)

        Z = U.dot(V.T)

        localAuc = numpy.zeros(m)

        for i in range(m):
            localAuc[i] = sklearn.metrics.roc_auc_score(numpy.ravel(X[i, :].toarray()), Z[i, :])

        localAuc = localAuc.mean()

        u = 0.0
        localAuc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertEquals(localAuc, localAuc2)

        # Now try a large r
        w = 1.0

        localAuc2 = MCEvaluator.localAUC(X, U, V, w)
        self.assertEquals(localAuc2, 0)
Exemplo n.º 3
0
    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix(
            (m, n), k, w, csarray=True, verbose=True, indsPerRow=200
        )

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib

        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt

        # plt.plot(fpr, tpr)
        # plt.show()

        # Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
Exemplo n.º 4
0
    def testLocalAUC(self):
        m = 10
        n = 20
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                0.5,
                                                                verbose=True,
                                                                csarray=True)

        Z = U.dot(V.T)

        localAuc = numpy.zeros(m)

        for i in range(m):
            localAuc[i] = sklearn.metrics.roc_auc_score(
                numpy.ravel(X[i, :].toarray()), Z[i, :])

        localAuc = localAuc.mean()

        u = 0.0
        localAuc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertEquals(localAuc, localAuc2)

        #Now try a large r
        w = 1.0

        localAuc2 = MCEvaluator.localAUC(X, U, V, w)
        self.assertEquals(localAuc2, 0)
Exemplo n.º 5
0
    def testOverfit(self):
        """
        See if we can get a zero objective on the hinge loss 
        """
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        k = 10
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "constant"
        maxLocalAuc.maxIterations = 500
        maxLocalAuc.numProcesses = 1
        maxLocalAuc.loss = "hinge"
        maxLocalAuc.validationUsers = 0
        maxLocalAuc.lmbda = 0

        print("Overfit example")
        U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(
            X, verbose=True)

        self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Exemplo n.º 6
0
    def testParallelLearnModel(self): 
        numpy.random.seed(21)
        m = 500 
        n = 200 
        k = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        
        u = 0.1
        w = 1-u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4
        
        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz/maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
Exemplo n.º 7
0
    def testAverageRocCurve(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                csarray=True,
                                                                verbose=True,
                                                                indsPerRow=200)

        fpr, tpr = MCEvaluator.averageRocCurve(X, U, V)

        import matplotlib
        matplotlib.use("GTK3Agg")
        import matplotlib.pyplot as plt
        #plt.plot(fpr, tpr)
        #plt.show()

        #Now try case where we have a training set
        folds = 1
        testSize = 5
        trainTestXs = Sampling.shuffleSplitRows(X, folds, testSize)
        trainX, testX = trainTestXs[0]

        fpr, tpr = MCEvaluator.averageRocCurve(testX, U, V, trainX=trainX)
Exemplo n.º 8
0
 def testOverfit(self): 
     """
     See if we can get a zero objective on the hinge loss 
     """
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     k = 10
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "constant"
     maxLocalAuc.maxIterations = 500
     maxLocalAuc.numProcesses = 1
     maxLocalAuc.loss = "hinge"
     maxLocalAuc.validationUsers = 0
     maxLocalAuc.lmbda = 0        
     
     print("Overfit example")
     U, V, trainMeasures, testMeasures, iterations, time = maxLocalAuc.learnModel(X, verbose=True)
     
     self.assertAlmostEquals(trainMeasures[-1, 0], 0, 3)
Exemplo n.º 9
0
    def testModelSelect(self):
        m = 50
        n = 50
        k = 5
        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)

        os.system('taskset -p 0xffffffff %d' % os.getpid())

        u = 0.2
        lmbda = 0.1
        gamma = 0.01
        learner = BprRecommender(k, lmbda, gamma)
        learner.maxIterations = 2
        learner.ks = 2**numpy.arange(3, 5)
        learner.lmbdaUsers = 2.0**-numpy.arange(1, 3)
        learner.lmbdaPoses = 2.0**-numpy.arange(1, 3)
        learner.lmbdaNegs = 2.0**-numpy.arange(1, 3)
        learner.gammas = 2.0**-numpy.arange(1, 3)
        learner.folds = 2
        learner.numProcesses = 1

        colProbs = numpy.array(X.sum(1)).ravel()
        colProbs /= colProbs.sum()
        print(colProbs, colProbs.shape)

        learner.modelSelect(X, colProbs=colProbs)
Exemplo n.º 10
0
 def testModelSelect(self): 
     m = 50 
     n = 50 
     k = 5
     u = 0.5 
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)
     
     os.system('taskset -p 0xffffffff %d' % os.getpid())
     
     u = 0.2
     lmbda = 0.1 
     gamma = 0.01
     learner = BprRecommender(k, lmbda, gamma)
     learner.maxIterations = 2        
     learner.ks = 2**numpy.arange(3, 5)
     learner.lmbdaUsers = 2.0**-numpy.arange(1, 3)
     learner.lmbdaPoses = 2.0**-numpy.arange(1, 3)
     learner.lmbdaNegs = 2.0**-numpy.arange(1, 3)
     learner.gammas = 2.0**-numpy.arange(1, 3)
     learner.folds = 2
     learner.numProcesses = 1 
     
     colProbs = numpy.array(X.sum(1)).ravel()
     colProbs /= colProbs.sum()
     print(colProbs, colProbs.shape)
     
     learner.modelSelect(X, colProbs=colProbs)
Exemplo n.º 11
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True, verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        # Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
Exemplo n.º 12
0
    def testLocalAucApprox(self):
        m = 100
        n = 200
        k = 2
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        w = 1.0
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(150, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)
            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Try smaller w
        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            numAucSamples = sampleSize
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, numAucSamples)

            self.assertAlmostEqual(localAuc2, localAuc, 1)
Exemplo n.º 13
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False), 2 * r / float(n) / (1 + r / float(n))
        )

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        # Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)
Exemplo n.º 14
0
    def __init__(self):
        numpy.random.seed(21)

        # Create a low rank matrix
        self.m = 1000
        self.n = 5000
        self.k = 10
        self.X = SparseUtils.generateSparseBinaryMatrix((self.m, self.n), self.k, csarray=True)
Exemplo n.º 15
0
 def __init__(self):
     numpy.random.seed(21)        
     
     #Create a low rank matrix  
     m = 500
     n = 200
     self.k = 8 
     self.X = SparseUtils.generateSparseBinaryMatrix((m, n), self.k, csarray=True)
     print(self.X)
Exemplo n.º 16
0
    def testSampleUsers2(self): 
        m = 10
        n = 15
        r = 5 
        u = 0.3
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

        k = X.nnz+100
        X2, userInds = Sampling.sampleUsers2(X, k)

        nptst.assert_array_equal(X.toarray(), X2.toarray())
        
        #Test pruning of cols 
        k = 500
        m = 100
        n = 500
        u = 0.1
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)
        numpy.random.seed(21)
        X2, userInds = Sampling.sampleUsers2(X, k, prune=True)
        nnz1 = X2.nnz
        self.assertTrue((X2.sum(0)!=0).all())

        numpy.random.seed(21)
        X2, userInds = Sampling.sampleUsers2(X, k, prune=False)
        nnz2 = X2.nnz
        self.assertEquals(nnz1, nnz2)

        numRuns = 50
        for i in range(numRuns): 
            m = numpy.random.randint(10, 100)
            n = numpy.random.randint(10, 100)
            k = 500

            X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, w, csarray=True, verbose=True, indsPerRow=200)

            X2, userInds = Sampling.sampleUsers2(X, k)
            

            self.assertTrue((X.dot(X.T)!=numpy.zeros((m, m)).all()))
            self.assertTrue((X2.toarray() == X.toarray()[userInds, :]).all())
            self.assertEquals(X.toarray()[userInds, :].nonzero()[0].shape[0], X2.nnz)
Exemplo n.º 17
0
    def testScale(self):
        """
        Look at the scales of the unnormalised gradients. 
        """

        m = 100
        n = 400
        k = 3
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)

        gi = numpy.random.rand(m)
        gi /= gi.sum()
        gp = numpy.random.rand(n)
        gp /= gp.sum()
        gq = numpy.random.rand(n)
        gq /= gq.sum()

        permutedRowInds = numpy.array(numpy.random.permutation(m),
                                      numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n),
                                      numpy.uint32)

        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)

        normDui = 0
        for i in range(m):
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDui += numpy.linalg.norm(du)

        normDui /= float(m)
        print(normDui)

        normDvi = 0

        for i in range(n):
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i)
            normDvi += numpy.linalg.norm(dv)

        normDvi /= float(n)
        print(normDvi)
Exemplo n.º 18
0
    def __init__(self):
        numpy.random.seed(21)

        #Create a low rank matrix
        self.m = 1000
        self.n = 5000
        self.k = 10
        self.X = SparseUtils.generateSparseBinaryMatrix((self.m, self.n),
                                                        self.k,
                                                        csarray=True)
Exemplo n.º 19
0
    def testScale(self): 
        """
        Look at the scales of the unnormalised gradients. 
        """        
        
        m = 100 
        n = 400 
        k = 3 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)
        
        w = 0.1
        eps = 0.001
        learner = MaxAUCTanh(k, w)
        learner.normalise = False
        learner.lmbdaU = 1.0
        learner.lmbdaV = 1.0
        learner.rho = 1.0
        learner.numAucSamples = 100
        
        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        r = numpy.random.rand(m)

        U = numpy.random.rand(X.shape[0], k)
        V = numpy.random.rand(X.shape[1], k)
        
        gi = numpy.random.rand(m)
        gi /= gi.sum()        
        gp = numpy.random.rand(n)
        gp /= gp.sum()        
        gq = numpy.random.rand(n)
        gq /= gq.sum()     
        
        permutedRowInds = numpy.array(numpy.random.permutation(m), numpy.uint32)
        permutedColInds = numpy.array(numpy.random.permutation(n), numpy.uint32)
        
        maxLocalAuc = MaxLocalAUC(k, w)
        normGp, normGq = maxLocalAuc.computeNormGpq(indPtr, colInds, gp, gq, m)
        
        normDui = 0
        for i in range(m): 
            du = learner.derivativeUi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDui += numpy.linalg.norm(du)
            
        normDui /= float(m)
        print(normDui)        
        
        normDvi = 0         
        
        for i in range(n): 
            dv = learner.derivativeVi(indPtr, colInds, U, V, r, gi, gp, gq, i) 
            normDvi += numpy.linalg.norm(dv)
            
        normDvi /= float(n)
        print(normDvi)
Exemplo n.º 20
0
 def setUp(self):
     numpy.set_printoptions(precision=3, suppress=True)
     numpy.random.seed(21)
     logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
     
     self.m = 30 
     self.n = 20 
     k = 5
     u = 0.1 
     w = 1-u
     self.X = SparseUtils.generateSparseBinaryMatrix((self.m, self.n), k, w, csarray=True)
     self.X.prune()
Exemplo n.º 21
0
    def __init__(self):
        numpy.random.seed(21)

        #Create a low rank matrix
        m = 500
        n = 200
        self.k = 8
        self.X = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                        self.k,
                                                        csarray=True)

        os.system('taskset -p 0xffffffff %d' % os.getpid())
Exemplo n.º 22
0
    def profileShuffleSplitRows(self):
        m = 10000
        n = 5000
        k = 5 
        u = 0.1
        w = 1-u
        X, U, s, V = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, csarray=True, verbose=True, indsPerRow=200)

        k2 = 10
        testSize = 2

        ProfileUtils.profile('Sampling.shuffleSplitRows(X, k2, testSize)', globals(), locals())
Exemplo n.º 23
0
    def testPrecisionAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        #print(MCEvaluator.precisionAtK(X, U*s, V, 2))

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n),
                                X.nnz / float(m * n))

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :],
                                              nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())

        #Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :],
                                              nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())
Exemplo n.º 24
0
    def testRecallAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0)

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :],
                                           nonzeroRow).shape[0] / float(
                                               nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())

        #Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :],
                                           nonzeroRow).shape[0] / float(
                                               nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())
Exemplo n.º 25
0
    def profileSampleUsers(self): 
        m = 10000
        n = 50000
        k = 5 
        u = 0.01
        w = 1-u
        X, U, s, V, wv  = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, csarray=True, verbose=True, indsPerRow=200)

        print(X.nnz)

        k2 = 100000

        ProfileUtils.profile('Sampling.sampleUsers2(X, k2)', globals(), locals())      
Exemplo n.º 26
0
    def testGenerateSparseBinaryMatrix(self):
        m = 5 
        n = 10 
        k = 3
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile)
        Xscipy = numpy.array(X.todense()) 
        
        nptst.assert_array_equal(numpy.array(X.sum(1)).flatten(), numpy.ones(m)*3)
        
        quantile = 0.0 
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile)
        self.assertTrue(numpy.linalg.norm(X - numpy.ones((m,n))) < 1.1)
        #nptst.assert_array_almost_equal(X.todense(), numpy.ones((m,n)))
        
        quantile = 0.7
        numpy.random.seed(21)
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile, csarray=True)
        Xcsarray = X.toarray()
        
        nptst.assert_array_equal(numpy.array(X.sum(1)).flatten(), numpy.ones(m)*3)
        
        quantile = 0.0 
        X = SparseUtils.generateSparseBinaryMatrix((m,n), k, quantile, csarray=True)
        self.assertTrue(numpy.linalg.norm(X.toarray() - numpy.ones((m,n))) < 1.1)
        #nptst.assert_array_almost_equal(X.toarray(), numpy.ones((m,n)))
        
        nptst.assert_array_equal(Xcsarray, Xscipy)
        
        #Test variation in the quantiles 
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=0.1, csarray=True, verbose=True)
        
        Z = (U*s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i]*100)
            X2[i, Z[i, :]>r2[i]] = 1 
        r = SparseUtilsCython.computeR2(U*s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
        
        #Test a larger standard deviation
        w = 0.7
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=0.5, csarray=True, verbose=True)
        
        Z = (U*s).dot(V.T)
        X2 = numpy.zeros((m, n))
        r2 = numpy.zeros(m)
        for i in range(m): 
            r2[i] = numpy.percentile(numpy.sort(Z[i, :]), wv[i]*100)
            X2[i, Z[i, :]>=r2[i]] = 1 
        r = SparseUtilsCython.computeR2(U*s, V, wv)

        nptst.assert_array_almost_equal(X.toarray(), X2)
        nptst.assert_array_almost_equal(r, r2)
Exemplo n.º 27
0
 def syntheticDataset1(m=500, n=200, k=8, u=0.1, sd=0, noise=5): 
     """
     Create a simple synthetic dataset 
     """
     w = 1-u
     X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, sd=sd, csarray=True, verbose=True, indsPerRow=200)
     X = X + sppy.rand((m, n), noise/float(n), storagetype="row")
     X[X.nonzero()] = 1
     X.prune()
     X = SparseUtils.pruneMatrixRows(X, minNnzRows=10)
     logging.debug("Non zero elements: " + str(X.nnz) + " shape: " + str(X.shape))
     U = U*s
     
     return X, U, V
Exemplo n.º 28
0
 def testModelSelect(self): 
     m = 50 
     n = 20 
     k = 5
     u = 0.1 
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)
     
     lmbda = 0.1 
     gamma = 0.01
     learner = CLiMF(k, lmbda, gamma)
     learner.max_iters = 10
     
     learner.modelSelect(X)
Exemplo n.º 29
0
    def testLearnModel(self):
        m = 50
        n = 20
        k = 5
        u = 0.1
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        lmbda = 0.1
        gamma = 0.01
        learner = BprRecommender(k, lmbda, gamma)
        learner.max_iters = 50

        learner.learnModel(X)
        Z = learner.predict(n)
Exemplo n.º 30
0
 def testLearnModel(self): 
     m = 50 
     n = 20 
     k = 5
     u = 0.1 
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     lmbda = 0.1 
     gamma = 0.01
     learner = BprRecommender(k, lmbda, gamma)
     learner.max_iters = 50
     
     learner.learnModel(X)
     Z = learner.predict(n)
Exemplo n.º 31
0
    def testLearningRateSelect(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        eps = 0.001
        maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.numProcesses = 1

        maxLocalAuc.learningRateSelect(X)
Exemplo n.º 32
0
 def testLearningRateSelect(self): 
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     eps = 0.001
     maxLocalAuc = MaxLocalAUC(k, u, eps=eps, stochastic=True)
     maxLocalAuc.rate = "optimal"
     maxLocalAuc.maxIterations = 5
     maxLocalAuc.numProcesses = 1
     
     maxLocalAuc.learningRateSelect(X)
Exemplo n.º 33
0
    def testAverageAuc(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix(
            (m, n), k, w, csarray=True, verbose=True, indsPerRow=200
        )

        auc = MCEvaluator.averageAuc(X, U, V)

        u = 0.0
        auc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertAlmostEquals(auc, auc2)
Exemplo n.º 34
0
    def testLocalAucApprox2(self):
        m = 100
        n = 200
        k = 5
        numInds = 100
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                csarray=True,
                                                                verbose=True)

        r = numpy.ones(m) * -10

        w = 0.5
        localAuc = MCEvaluator.localAUC(X, U, V, w)

        samples = numpy.arange(50, 200, 10)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                               U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)

        #Now set a high r
        Z = U.dot(V.T)
        localAuc = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                              U, V, w, sampleSize)

        for i, sampleSize in enumerate(samples):
            localAuc2 = MCEvaluator.localAUCApprox(
                SparseUtils.getOmegaListPtr(X), U, V, w, sampleSize)

            self.assertAlmostEqual(localAuc2, localAuc, 1)

        #Test more accurately
        sampleSize = 1000
        localAuc2 = MCEvaluator.localAUCApprox(SparseUtils.getOmegaListPtr(X),
                                               U, V, w, sampleSize)
        self.assertAlmostEqual(localAuc2, localAuc, 2)
Exemplo n.º 35
0
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        k = 10
        orderedItems, scores = MCEvaluator.recommendAtk(U, V, k, verbose=True)

        #Now do it manually
        Z = U.dot(V.T)

        orderedItems2 = Util.argmaxN(Z, k)
        scores2 = numpy.fliplr(numpy.sort(Z, 1))[:, 0:k]

        nptst.assert_array_equal(orderedItems, orderedItems2)
        nptst.assert_array_equal(scores, scores2)

        #Test case where we have a set of training indices to remove
        #Let's create a random omegaList
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])

        orderedItems = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        #print(omegaList)
        #print(orderedItems)
        #print(orderedItems2)

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            items = numpy.union1d(omegaList[i], orderedItems[i, :])
            items = numpy.intersect1d(items, orderedItems2[i, :])
            nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
Exemplo n.º 36
0
    def testReciprocalRankAtk(self):
        m = 20
        n = 50
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True,
                                                                csarray=True)

        k = 5
        orderedItems = numpy.random.randint(0, n, m * k)
        orderedItems = numpy.reshape(orderedItems, (m, k))
        orderedItems = numpy.array(orderedItems, numpy.int32)

        (indPtr, colInds) = X.nonzeroRowsPtr()
        indPtr = numpy.array(indPtr, numpy.uint32)
        colInds = numpy.array(colInds, numpy.uint32)
        rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds,
                                                  orderedItems)

        rrs2 = numpy.zeros(m)
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            for j in range(k):
                if orderedItems[i, j] in omegai:
                    rrs2[i] = 1 / float(1 + j)
                    break

        nptst.assert_array_equal(rrs, rrs2)

        #Test case where no items are in ranking
        orderedItems = numpy.ones((m, k), numpy.int32) * (n + 1)
        rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds,
                                                  orderedItems)
        nptst.assert_array_equal(rrs, numpy.zeros(m))

        #Now, make all items rank 2
        for i in range(m):
            omegai = colInds[indPtr[i]:indPtr[i + 1]]
            orderedItems[i, 1] = omegai[0]

        rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds,
                                                  orderedItems)
        nptst.assert_array_equal(rrs, numpy.ones(m) * 0.5)
Exemplo n.º 37
0
    def profileShuffleSplitRows(self):
        m = 10000
        n = 5000
        k = 5
        u = 0.1
        w = 1 - u
        X, U, s, V = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                            k,
                                                            w,
                                                            csarray=True,
                                                            verbose=True,
                                                            indsPerRow=200)

        k2 = 10
        testSize = 2

        ProfileUtils.profile('Sampling.shuffleSplitRows(X, k2, testSize)',
                             globals(), locals())
Exemplo n.º 38
0
    def testModelSelect(self): 
        m = 50 
        n = 50 
        k = 5
        u = 0.5 
        w = 1-u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)
        
        os.system('taskset -p 0xffffffff %d' % os.getpid())
        

        learner = WeightedMf(k)
        learner.maxIterations = 10        
        learner.ks = 2**numpy.arange(3, 5)
        learner.folds = 2
        #maxLocalAuc.numProcesses = 1 
        
        learner.modelSelect(X)
Exemplo n.º 39
0
 def testLearnModel(self): 
     m = 50 
     n = 20 
     k = 5
     u = 0.1 
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w)
     
     lmbda = 0.1 
     gamma = 0.01
     learner = CLiMF(k, lmbda, gamma)
     learner.max_iters = 50
     
     learner.learnModel(X)
     Z = learner.predict(n)
     
     #Bit weird that all rows are the same 
     print(Z)
Exemplo n.º 40
0
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)

        k = 10

        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1

        X = sppy.csarray(X)

        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)

        nptst.assert_array_equal(orderedItems[orderedItems2 != -1],
                                 orderedItems2[orderedItems2 != -1])

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))

        #Now let's have an all zeros X
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        nptst.assert_array_equal(orderedItems, orderedItems2)
Exemplo n.º 41
0
    def profileSampleUsers(self):
        m = 10000
        n = 50000
        k = 5
        u = 0.01
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                csarray=True,
                                                                verbose=True,
                                                                indsPerRow=200)

        print(X.nnz)

        k2 = 100000

        ProfileUtils.profile('Sampling.sampleUsers2(X, k2)', globals(),
                             locals())
Exemplo n.º 42
0
    def testLearnModel(self):
        m = 50
        n = 20
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        u = 0.1
        w = 1 - u
        eps = 0.05

        maxLocalAuc = MaxLocalAUC(k, w, alpha=5.0, eps=eps, stochastic=False)
        U, V = maxLocalAuc.learnModel(X)

        maxLocalAuc.stochastic = True
        U, V = maxLocalAuc.learnModel(X)

        #Test case where we do not have validation set
        maxLocalAuc.validationUsers = 0.0
        U, V = maxLocalAuc.learnModel(X)
Exemplo n.º 43
0
    def testAverageAuc(self):
        m = 50
        n = 20
        k = 8
        u = 20.0 / m
        w = 1 - u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                k,
                                                                w,
                                                                csarray=True,
                                                                verbose=True,
                                                                indsPerRow=200)

        auc = MCEvaluator.averageAuc(X, U, V)

        u = 0.0
        auc2 = MCEvaluator.localAUC(X, U, V, u)

        self.assertAlmostEquals(auc, auc2)
Exemplo n.º 44
0
    def testRecommendAtk(self):
        m = 20
        n = 50
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)

        k = 10
        orderedItems, scores = MCEvaluator.recommendAtk(U, V, k, verbose=True)

        # Now do it manually
        Z = U.dot(V.T)

        orderedItems2 = Util.argmaxN(Z, k)
        scores2 = numpy.fliplr(numpy.sort(Z, 1))[:, 0:k]

        nptst.assert_array_equal(orderedItems, orderedItems2)
        nptst.assert_array_equal(scores, scores2)

        # Test case where we have a set of training indices to remove
        # Let's create a random omegaList
        omegaList = []
        for i in range(m):
            omegaList.append(numpy.random.permutation(n)[0:5])

        orderedItems = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k)

        # print(omegaList)
        # print(orderedItems)
        # print(orderedItems2)

        for i in range(m):
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)

            items = numpy.union1d(omegaList[i], orderedItems[i, :])
            items = numpy.intersect1d(items, orderedItems2[i, :])
            nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
Exemplo n.º 45
0
    def testLearnModel(self): 
        m = 50 
        n = 20 
        k = 5 
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        u = 0.1
        w = 1-u
        eps = 0.05
        
        maxLocalAuc = MaxLocalAUC(k, w, alpha=5.0, eps=eps, stochastic=False)
        U, V = maxLocalAuc.learnModel(X)
        
        maxLocalAuc.stochastic = True 
        U, V = maxLocalAuc.learnModel(X)
        
        #Test case where we do not have validation set 
        maxLocalAuc.validationUsers = 0.0
        U, V = maxLocalAuc.learnModel(X)
Exemplo n.º 46
0
    def testPrecisionAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)

        # print(MCEvaluator.precisionAtK(X, U*s, V, 2))

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.precisionAtK(X, orderedItems, n), X.nnz / float(m * n))

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())

        # Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X, orderedItems, k, verbose=True)

        precisions = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            precisions[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(k)

        self.assertEquals(precision.mean(), precisions.mean())
Exemplo n.º 47
0
    def testModelSelectMaxNorm(self):
        m = 10
        n = 20
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        os.system('taskset -p 0xffffffff %d' % os.getpid())

        eps = 0.001
        k = 5
        maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 5
        maxLocalAuc.recordStep = 1
        maxLocalAuc.validationSize = 3
        maxLocalAuc.metric = "f1"

        maxLocalAuc.modelSelectNorm(X)
Exemplo n.º 48
0
 def testModelSelectMaxNorm(self): 
     m = 10 
     n = 20 
     k = 5 
     
     u = 0.5
     w = 1-u
     X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)
     
     os.system('taskset -p 0xffffffff %d' % os.getpid())
     
     eps = 0.001
     k = 5
     maxLocalAuc = MaxLocalAUC(k, w, eps=eps, stochastic=True)
     maxLocalAuc.maxIterations = 5
     maxLocalAuc.recordStep = 1
     maxLocalAuc.validationSize = 3
     maxLocalAuc.metric = "f1"
     
     maxLocalAuc.modelSelectNorm(X)
Exemplo n.º 49
0
    def testRecommendAtk(self): 
        m = 20 
        n = 50 
        r = 3 

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True)

        import sppy 
        X = sppy.csarray(X)  
        
        k = 10        
        
        X = numpy.zeros(X.shape)
        omegaList = []
        for i in range(m): 
            omegaList.append(numpy.random.permutation(n)[0:5])
            X[i, omegaList[i]] = 1
            
        X = sppy.csarray(X)            
        
        
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k, omegaList=omegaList)
                
        nptst.assert_array_equal(orderedItems[orderedItems2!=-1], orderedItems2[orderedItems2!=-1])

        for i in range(m): 
            items = numpy.intersect1d(omegaList[i], orderedItems[i, :])
            self.assertEquals(items.shape[0], 0)
            
            #items = numpy.union1d(omegaList[i], orderedItems[i, :])
            #items = numpy.intersect1d(items, orderedItems2[i, :])
            #nptst.assert_array_equal(items, numpy.sort(orderedItems2[i, :]))
            
        #Now let's have an all zeros X 
        X = sppy.csarray(X.shape)
        orderedItems = MCEvaluatorCython.recommendAtk(U, V, k, X)
        orderedItems2 = MCEvaluator.recommendAtk(U, V, k) 
        
        nptst.assert_array_equal(orderedItems, orderedItems2)
Exemplo n.º 50
0
    def testRecallAtK(self):
        m = 10
        n = 5
        r = 3

        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n), r, 0.5, verbose=True)

        import sppy

        X = sppy.csarray(X)

        orderedItems = MCEvaluator.recommendAtk(U, V, n)
        self.assertAlmostEquals(MCEvaluator.recallAtK(X, orderedItems, n), 1.0)

        k = 2
        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())

        # Now try random U and V
        U = numpy.random.rand(m, 3)
        V = numpy.random.rand(m, 3)

        orderedItems = MCEvaluator.recommendAtk(U, V, k)
        recall, scoreInds = MCEvaluator.recallAtK(X, orderedItems, k, verbose=True)

        recalls = numpy.zeros(m)
        for i in range(m):
            nonzeroRow = X.toarray()[i, :].nonzero()[0]

            recalls[i] = numpy.intersect1d(scoreInds[i, :], nonzeroRow).shape[0] / float(nonzeroRow.shape[0])

        self.assertEquals(recall.mean(), recalls.mean())
Exemplo n.º 51
0
 def testReciprocalRankAtk(self): 
     m = 20 
     n = 50 
     r = 3 
     X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), r, 0.5, verbose=True, csarray=True)
     
     k = 5
     orderedItems = numpy.random.randint(0, n, m*k)
     orderedItems = numpy.reshape(orderedItems, (m, k))
     orderedItems = numpy.array(orderedItems, numpy.int32)
     
     (indPtr, colInds) = X.nonzeroRowsPtr()
     indPtr = numpy.array(indPtr, numpy.uint32)
     colInds = numpy.array(colInds, numpy.uint32)
     rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems)
     
     rrs2 = numpy.zeros(m)
     for i in range(m): 
         omegai = colInds[indPtr[i]:indPtr[i+1]]
         for j in range(k): 
             if orderedItems[i, j] in omegai: 
                 rrs2[i] = 1/float(1+j)
                 break 
     
     nptst.assert_array_equal(rrs, rrs2)
     
     #Test case where no items are in ranking 
     orderedItems = numpy.ones((m, k), numpy.int32) * (n+1)
     rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems)
     nptst.assert_array_equal(rrs, numpy.zeros(m))
     
     #Now, make all items rank 2
     for i in range(m): 
         omegai = colInds[indPtr[i]:indPtr[i+1]]
         orderedItems[i, 1] = omegai[0]
     
     rrs = MCEvaluatorCython.reciprocalRankAtk(indPtr, colInds, orderedItems)
     nptst.assert_array_equal(rrs, numpy.ones(m)*0.5)     
Exemplo n.º 52
0
    def testRestrictOmega(self):
        m = 50
        n = 100
        k = 5

        u = 0.5
        w = 1 - u
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, w, csarray=True)

        indPtr, colInds = SparseUtils.getOmegaListPtr(X)
        runs = 100

        for i in range(runs):
            colSubset = numpy.random.choice(n, 20, replace=False)

            newIndPtr, newColInds = restrictOmega(indPtr, colInds, colSubset)

            for i in range(m):
                omegai = colInds[indPtr[i]:indPtr[i + 1]]
                omegai2 = newColInds[newIndPtr[i]:newIndPtr[i + 1]]

                a = numpy.setdiff1d(omegai, omegai2)
                self.assertEquals(numpy.intersect1d(a, colSubset).shape[0], 0)
Exemplo n.º 53
0
    def testParallelLearnModel(self):
        numpy.random.seed(21)
        m = 500
        n = 200
        k = 5
        X = SparseUtils.generateSparseBinaryMatrix((m, n), k, csarray=True)

        from wallhack.rankingexp.DatasetUtils import DatasetUtils
        X, U, V = DatasetUtils.syntheticDataset1()

        u = 0.1
        w = 1 - u
        eps = 0.05
        maxLocalAuc = MaxLocalAUC(k, w, alpha=1.0, eps=eps, stochastic=True)
        maxLocalAuc.maxIterations = 3
        maxLocalAuc.recordStep = 1
        maxLocalAuc.rate = "optimal"
        maxLocalAuc.t0 = 2.0
        maxLocalAuc.validationUsers = 0.0
        maxLocalAuc.numProcesses = 4

        os.system('taskset -p 0xffffffff %d' % os.getpid())
        print(X.nnz / maxLocalAuc.numAucSamples)
        U, V = maxLocalAuc.parallelLearnModel(X)
Exemplo n.º 54
0
    def profileLocalAucApprox(self):
        m = 500
        n = 1000
        k = 10
        X, U, s, V = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                            k,
                                                            csarray=True,
                                                            verbose=True)

        u = 0.1
        w = 1 - u
        numAucSamples = 200

        omegaList = SparseUtils.getOmegaList(X)
        r = SparseUtilsCython.computeR(U, V, w, numAucSamples)

        numRuns = 10

        def run():
            for i in range(numRuns):
                MCEvaluator.localAUCApprox(X, U, V, omegaList, numAucSamples,
                                           r)

        ProfileUtils.profile('run()', globals(), locals())
Exemplo n.º 55
0
    def testF1Atk(self):
        m = 10
        n = 5
        r = 3
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)

        import sppy
        X = sppy.csarray(X)
        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)

        self.assertAlmostEquals(
            MCEvaluator.f1AtK(X, orderedItems, n, verbose=False),
            2 * r / float(n) / (1 + r / float(n)))

        m = 20
        n = 50
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m, n),
                                                                r,
                                                                0.5,
                                                                verbose=True)
        k = 5

        orderedItems = MCEvaluator.recommendAtk(U * s, V, k)
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)
        f1s = numpy.zeros(m)

        for i in range(m):
            f1s[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i])

        orderedItems = MCEvaluator.recommendAtk(U * s, V, n)
        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)

        #Test case where we get a zero precision or recall
        orderedItems[5, :] = -1
        precision, scoreInds = MCEvaluator.precisionAtK(X,
                                                        orderedItems,
                                                        k,
                                                        verbose=True)
        recall, scoreInds = MCEvaluator.recallAtK(X,
                                                  orderedItems,
                                                  k,
                                                  verbose=True)

        f1s = numpy.zeros(m)

        for i in range(m):
            if precision[i] + recall[i] != 0:
                f1s[i] = 2 * precision[i] * recall[i] / (precision[i] +
                                                         recall[i])

        f1s2, scoreInds = MCEvaluator.f1AtK(X, orderedItems, k, verbose=True)

        nptst.assert_array_equal(f1s, f1s2)
Exemplo n.º 56
0
    def testShuffleSplitRows(self): 
        m = 10
        n = 16
        k = 5 
        u = 0.5
        w = 1-u
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, csarray=True, verbose=True, indsPerRow=200)
        
        #print(X.toarray())
        
        k2 = 5 
        testSize = 2
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, rowMajor=True)
        
        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
                        
            self.assertEquals(trainX.storagetype, "row")
            self.assertEquals(testX.storagetype, "row")
            nptst.assert_array_almost_equal(X.toarray(), (trainX+testX).toarray())
            nptst.assert_array_equal(testX.sum(1), testSize*numpy.ones(m))
            self.assertEquals(X.nnz, trainX.nnz + testX.nnz)
        
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, rowMajor=False)
        
        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
                       
            self.assertEquals(trainX.storagetype, "col")
            self.assertEquals(testX.storagetype, "col")                       
            nptst.assert_array_almost_equal(X.toarray(), (trainX+testX).toarray())
            nptst.assert_array_equal(testX.sum(1), testSize*numpy.ones(m))
            self.assertEquals(X.nnz, trainX.nnz + testX.nnz)        
        
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, csarray=False)
        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
                        
            nptst.assert_array_almost_equal(X.toarray(), (trainX+testX).toarray())
            
            nptst.assert_array_equal(numpy.ravel(testX.sum(1)), testSize*numpy.ones(m))
            self.assertEquals(X.nnz, trainX.nnz + testX.nnz)

        testSize = 0
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize)
        
        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
                        
            nptst.assert_array_almost_equal(X.toarray(), (trainX+testX).toarray())
            nptst.assert_array_equal(testX.sum(1), testSize*numpy.ones(m))
            self.assertEquals(X.nnz, trainX.nnz + testX.nnz)
            self.assertEquals(testX.nnz, 0)
            
        #Test sampling a subset of the rows 
        testSize = 2
        numRows = 5
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, numRows=numRows, rowMajor=False)

        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
            
            nptst.assert_array_almost_equal(X.toarray(), (trainX+testX).toarray())
            self.assertEquals(numpy.nonzero(testX.sum(1))[0].shape[0], numRows)
            self.assertEquals(X.nnz, trainX.nnz + testX.nnz)
            self.assertEquals(testX.nnz, testSize*numRows)
            
        #Make sure column probabilities are correct 
        w = 0.0            
        X, U, s, V, wv = SparseUtils.generateSparseBinaryMatrix((m,n), k, w, csarray=True, verbose=True, indsPerRow=200)            
            
        testSize = 5
        k2 = 500
        colProbs = numpy.arange(0, n, dtype=numpy.float)+1
        colProbs /= colProbs.sum() 
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, colProbs=colProbs)
        
        colProbs2 = numpy.zeros(n)        
        
        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
            
            colProbs2 += testX.sum(0)
        
        colProbs2 /= colProbs2.sum() 
        nptst.assert_array_almost_equal(colProbs, colProbs2, 2)
        
        #Now test when probabilities are uniform 
        colProbs = numpy.ones(n)/float(n)        
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, colProbs=colProbs)
        
        colProbs = None
        trainTestXs2 = Sampling.shuffleSplitRows(X, k2, testSize, colProbs=colProbs)
        
        colProbs2 = numpy.zeros(n)       
        colProbs3 = numpy.zeros(n) 
        
        for i in range(k2): 
            trainX = trainTestXs[i][0]
            testX = trainTestXs[i][1]
            colProbs2 += testX.sum(0)
            
            trainX = trainTestXs2[i][0]
            testX = trainTestXs2[i][1]
            colProbs3 += testX.sum(0)
        
        colProbs2 /= colProbs2.sum() 
        colProbs3 /= colProbs3.sum()
        nptst.assert_array_almost_equal(colProbs2, colProbs3, 2)
        
        #Test when numRows=m
        numpy.random.seed(21)
        trainTestXs = Sampling.shuffleSplitRows(X, k2, testSize, numRows=m)
        numpy.random.seed(21)
        trainTestXs2 = Sampling.shuffleSplitRows(X, k2, testSize)

        nptst.assert_array_equal(trainTestXs[0][0].toarray(), trainTestXs2[0][0].toarray())
        nptst.assert_array_equal(trainTestXs[0][1].toarray(), trainTestXs2[0][1].toarray())