Exemplo n.º 1
0
    def testSparseMatrix(self): 
        m = 10
        n = 15
        
        A = numpy.random.rand(m, n)
        rowInds, colInds = A.nonzero()
        vals = A[rowInds, colInds]
        

        
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "scipy", storagetype="col")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== scipy.sparse.csc_matrix)
        nptst.assert_array_equal(X.toarray(), A)
        
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "scipy", storagetype="row")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== scipy.sparse.csr_matrix)
        nptst.assert_array_equal(X.toarray(), A)
       
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "csarray", storagetype="col")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== sppy.csarray)
        self.assertTrue(X.storagetype=="col")
        nptst.assert_array_equal(X.toarray(), A)
        
        X = SparseUtils.sparseMatrix(vals, rowInds, colInds, A.shape, "csarray", storagetype="row")
        self.assertTrue(X.dtype==A.dtype)
        self.assertTrue(X.shape==A.shape)
        self.assertTrue(type(X)== sppy.csarray)
        self.assertTrue(X.storagetype=="row")
        nptst.assert_array_equal(X.toarray(), A)       
Exemplo n.º 2
0
    def testSparseMatrix(self):
        m = 10
        n = 15

        A = numpy.random.rand(m, n)
        rowInds, colInds = A.nonzero()
        vals = A[rowInds, colInds]

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "scipy",
                                     storagetype="col")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == scipy.sparse.csc_matrix)
        nptst.assert_array_equal(X.toarray(), A)

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "scipy",
                                     storagetype="row")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == scipy.sparse.csr_matrix)
        nptst.assert_array_equal(X.toarray(), A)

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "csarray",
                                     storagetype="col")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == sppy.csarray)
        self.assertTrue(X.storagetype == "col")
        nptst.assert_array_equal(X.toarray(), A)

        X = SparseUtils.sparseMatrix(vals,
                                     rowInds,
                                     colInds,
                                     A.shape,
                                     "csarray",
                                     storagetype="row")
        self.assertTrue(X.dtype == A.dtype)
        self.assertTrue(X.shape == A.shape)
        self.assertTrue(type(X) == sppy.csarray)
        self.assertTrue(X.storagetype == "row")
        nptst.assert_array_equal(X.toarray(), A)
Exemplo n.º 3
0
    def shuffleSplitRows(X, k, testSize, numRows=None, csarray=True, rowMajor=True, colProbs=None): 
        """
        Take a sparse binary matrix and create k number of train-test splits 
        in which the test split contains at most testSize elements and the train 
        split contains the remaining elements from X for each row. The splits are 
        computed randomly. Returns sppy.csarray objects by default. 
        
        :param colProbs: This is the probability of choosing the corresponding column/item. If None, we assume uniform probabilities. 
        """
        if csarray: 
            mattype = "csarray"
        else: 
            mattype = "scipy" 
            
        if rowMajor: 
            storagetype = "row" 
        else: 
            storagetype = "col"
            
        if numRows == None: 
            numRows = X.shape[0]
            outputRows = False
        else: 
            outputRows = True
        
        trainTestXList = []
        omegaList = SparseUtils.getOmegaList(X)
        m, n = X.shape
        
        for i in range(k):
            trainInd = 0 
            testInd = 0            
            
            trainRowInds = numpy.zeros(X.nnz, numpy.int32)
            trainColInds = numpy.zeros(X.nnz, numpy.int32)
            
            testRowInds = numpy.zeros(X.shape[0]*testSize, numpy.int32)
            testColInds = numpy.zeros(X.shape[0]*testSize, numpy.int32)

            rowSample = numpy.sort(numpy.random.choice(m, numRows, replace=False))

            for j in range(m):
                
                if j in rowSample: 
                    if colProbs == None: 
                        inds = numpy.random.permutation(omegaList[j].shape[0])
                    else: 
                        probs = colProbs[omegaList[j]]
                        probs /= probs.sum() 
                        inds = numpy.random.choice(omegaList[j].shape[0], omegaList[j].shape[0], p=probs, replace=False)
                    trainInds = inds[testSize:]
                    testInds = inds[0:testSize]
                else: 
                    trainInds = numpy.arange(omegaList[j].shape[0]) 
                    testInds = numpy.array([], numpy.int)
                    
                trainRowInds[trainInd:trainInd+trainInds.shape[0]] = numpy.ones(trainInds.shape[0], dtype=numpy.uint)*j
                trainColInds[trainInd:trainInd+trainInds.shape[0]] = omegaList[j][trainInds]
                trainInd += trainInds.shape[0]
                
                testRowInds[testInd:testInd+testInds.shape[0]] = numpy.ones(testInds.shape[0], dtype=numpy.uint)*j
                testColInds[testInd:testInd+testInds.shape[0]] = omegaList[j][testInds]
                testInd += testInds.shape[0]
                
            trainRowInds = trainRowInds[0:trainInd]   
            trainColInds = trainColInds[0:trainInd] 
      
            testRowInds = testRowInds[0:testInd]   
            testColInds = testColInds[0:testInd]
            
            trainX = SparseUtils.sparseMatrix(numpy.ones(trainRowInds.shape[0], numpy.int), trainRowInds, trainColInds, X.shape, mattype, storagetype)
            testX = SparseUtils.sparseMatrix(numpy.ones(testRowInds.shape[0], numpy.int), testRowInds, testColInds, X.shape, mattype, storagetype)

            if not outputRows: 
                trainTestXList.append((trainX, testX))
            else: 
                trainTestXList.append((trainX, testX, rowSample))
        
        return trainTestXList 
Exemplo n.º 4
0
    def shuffleSplitRows(X,
                         k,
                         testSize,
                         numRows=None,
                         csarray=True,
                         rowMajor=True,
                         colProbs=None):
        """
        Take a sparse binary matrix and create k number of train-test splits 
        in which the test split contains at most testSize elements and the train 
        split contains the remaining elements from X for each row. The splits are 
        computed randomly. Returns sppy.csarray objects by default. 
        
        :param colProbs: This is the probability of choosing the corresponding column/item. If None, we assume uniform probabilities. 
        """
        if csarray:
            mattype = "csarray"
        else:
            mattype = "scipy"

        if rowMajor:
            storagetype = "row"
        else:
            storagetype = "col"

        if numRows == None:
            numRows = X.shape[0]
            outputRows = False
        else:
            outputRows = True

        trainTestXList = []
        omegaList = SparseUtils.getOmegaList(X)
        m, n = X.shape

        for i in range(k):
            trainInd = 0
            testInd = 0

            trainRowInds = numpy.zeros(X.nnz, numpy.int32)
            trainColInds = numpy.zeros(X.nnz, numpy.int32)

            testRowInds = numpy.zeros(X.shape[0] * testSize, numpy.int32)
            testColInds = numpy.zeros(X.shape[0] * testSize, numpy.int32)

            rowSample = numpy.sort(
                numpy.random.choice(m, numRows, replace=False))

            for j in range(m):

                if j in rowSample:
                    if colProbs == None:
                        inds = numpy.random.permutation(omegaList[j].shape[0])
                    else:
                        probs = colProbs[omegaList[j]]
                        probs /= probs.sum()
                        inds = numpy.random.choice(omegaList[j].shape[0],
                                                   omegaList[j].shape[0],
                                                   p=probs,
                                                   replace=False)
                    trainInds = inds[testSize:]
                    testInds = inds[0:testSize]
                else:
                    trainInds = numpy.arange(omegaList[j].shape[0])
                    testInds = numpy.array([], numpy.int)

                trainRowInds[trainInd:trainInd +
                             trainInds.shape[0]] = numpy.ones(
                                 trainInds.shape[0], dtype=numpy.uint) * j
                trainColInds[trainInd:trainInd +
                             trainInds.shape[0]] = omegaList[j][trainInds]
                trainInd += trainInds.shape[0]

                testRowInds[testInd:testInd + testInds.shape[0]] = numpy.ones(
                    testInds.shape[0], dtype=numpy.uint) * j
                testColInds[testInd:testInd +
                            testInds.shape[0]] = omegaList[j][testInds]
                testInd += testInds.shape[0]

            trainRowInds = trainRowInds[0:trainInd]
            trainColInds = trainColInds[0:trainInd]

            testRowInds = testRowInds[0:testInd]
            testColInds = testColInds[0:testInd]

            trainX = SparseUtils.sparseMatrix(
                numpy.ones(trainRowInds.shape[0], numpy.int), trainRowInds,
                trainColInds, X.shape, mattype, storagetype)
            testX = SparseUtils.sparseMatrix(
                numpy.ones(testRowInds.shape[0], numpy.int), testRowInds,
                testColInds, X.shape, mattype, storagetype)

            if not outputRows:
                trainTestXList.append((trainX, testX))
            else:
                trainTestXList.append((trainX, testX, rowSample))

        return trainTestXList