Пример #1
0
class MatrixUDTTests(MLlibTestCase):

    dm1 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10])
    dm2 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10], isTransposed=True)
    sm1 = SparseMatrix(1, 1, [0, 1], [0], [2.0])
    sm2 = SparseMatrix(2, 1, [0, 0, 1], [0], [5.0], isTransposed=True)
    udt = MatrixUDT()

    def test_json_schema(self):
        self.assertEqual(MatrixUDT.fromJson(self.udt.jsonValue()), self.udt)

    def test_serialization(self):
        for m in [self.dm1, self.dm2, self.sm1, self.sm2]:
            self.assertEqual(m, self.udt.deserialize(self.udt.serialize(m)))

    def test_infer_schema(self):
        rdd = self.sc.parallelize([("dense", self.dm1), ("sparse", self.sm1)])
        df = rdd.toDF()
        schema = df.schema
        self.assertTrue(schema.fields[1].dataType, self.udt)
        matrices = df.rdd.map(lambda x: x._2).collect()
        self.assertEqual(len(matrices), 2)
        for m in matrices:
            if isinstance(m, DenseMatrix):
                self.assertTrue(m, self.dm1)
            elif isinstance(m, SparseMatrix):
                self.assertTrue(m, self.sm1)
            else:
                raise ValueError("Expected a matrix but got type %r" % type(m))
Пример #2
0
    def test_repr_sparse_matrix(self):
        sm1t = SparseMatrix(
            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
            isTransposed=True)
        self.assertTrue(
            repr(sm1t),
            'SparseMatrix(3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], True)')

        indices = tile(arange(6), 3)
        values = ones(18)
        sm = SparseMatrix(6, 3, [0, 6, 12, 18], indices, values)
        self.assertTrue(
            repr(sm), "SparseMatrix(6, 3, [0, 6, 12, 18], \
                [0, 1, 2, 3, 4, 5, 0, 1, ..., 4, 5, 0, 1, 2, 3, 4, 5], \
                [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..., \
                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], False)")

        self.assertTrue(
            str(sm),
            "6 X 3 CSCMatrix\n\
            (0,0) 1.0\n(1,0) 1.0\n(2,0) 1.0\n(3,0) 1.0\n(4,0) 1.0\n(5,0) 1.0\n\
            (0,1) 1.0\n(1,1) 1.0\n(2,1) 1.0\n(3,1) 1.0\n(4,1) 1.0\n(5,1) 1.0\n\
            (0,2) 1.0\n(1,2) 1.0\n(2,2) 1.0\n(3,2) 1.0\n..\n..")

        sm = SparseMatrix(1, 18, zeros(19), [], [])
        self.assertTrue(
            repr(sm),
            'SparseMatrix(1, 18, \
                [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0], [], [], False)')
Пример #3
0
 def difun(x, vect):
     if x[0] == x[1]:
         sm = SparseMatrix(p, p, np.linspace(0, p, num = (p+1)), \
             np.linspace(0, p-1, num = p), vect[(x[0]*p):((x[0]+1)*p)])
         return (x, sm)
     else:
         h = sparse.csc_matrix((p, p))
         return (x, Matrices.sparse(p, p, h.indptr, h.indices, h.data))
Пример #4
0
 def test_ml_mllib_matrix_conversion(self):
     # to ml
     # dense
     mllibDM = Matrices.dense(2, 2, [0, 1, 2, 3])
     mlDM1 = newlinalg.Matrices.dense(2, 2, [0, 1, 2, 3])
     mlDM2 = mllibDM.asML()
     self.assertEqual(mlDM2, mlDM1)
     # transposed
     mllibDMt = DenseMatrix(2, 2, [0, 1, 2, 3], True)
     mlDMt1 = newlinalg.DenseMatrix(2, 2, [0, 1, 2, 3], True)
     mlDMt2 = mllibDMt.asML()
     self.assertEqual(mlDMt2, mlDMt1)
     # sparse
     mllibSM = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mlSM1 = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1],
                                       [2, 3, 4])
     mlSM2 = mllibSM.asML()
     self.assertEqual(mlSM2, mlSM1)
     # transposed
     mllibSMt = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
     mlSMt1 = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4],
                                     True)
     mlSMt2 = mllibSMt.asML()
     self.assertEqual(mlSMt2, mlSMt1)
     # from ml
     # dense
     mllibDM1 = Matrices.dense(2, 2, [1, 2, 3, 4])
     mlDM = newlinalg.Matrices.dense(2, 2, [1, 2, 3, 4])
     mllibDM2 = Matrices.fromML(mlDM)
     self.assertEqual(mllibDM1, mllibDM2)
     # transposed
     mllibDMt1 = DenseMatrix(2, 2, [1, 2, 3, 4], True)
     mlDMt = newlinalg.DenseMatrix(2, 2, [1, 2, 3, 4], True)
     mllibDMt2 = Matrices.fromML(mlDMt)
     self.assertEqual(mllibDMt1, mllibDMt2)
     # sparse
     mllibSM1 = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mlSM = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mllibSM2 = Matrices.fromML(mlSM)
     self.assertEqual(mllibSM1, mllibSM2)
     # transposed
     mllibSMt1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
     mlSMt = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4],
                                    True)
     mllibSMt2 = Matrices.fromML(mlSMt)
     self.assertEqual(mllibSMt1, mllibSMt2)
Пример #5
0
 def test_serialize(self):
     self._test_serialize(DenseVector(range(10)))
     self._test_serialize(DenseVector(array([1.0, 2.0, 3.0, 4.0])))
     self._test_serialize(DenseVector(pyarray.array("d", range(10))))
     self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
     self._test_serialize(SparseVector(3, {}))
     self._test_serialize(DenseMatrix(2, 3, range(6)))
     sm1 = SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
     self._test_serialize(sm1)
Пример #6
0
 def test_ml_mllib_matrix_conversion(self):
     # to ml
     # dense
     mllibDM = Matrices.dense(2, 2, [0, 1, 2, 3])
     mlDM1 = newlinalg.Matrices.dense(2, 2, [0, 1, 2, 3])
     mlDM2 = mllibDM.asML()
     self.assertEqual(mlDM2, mlDM1)
     # transposed
     mllibDMt = DenseMatrix(2, 2, [0, 1, 2, 3], True)
     mlDMt1 = newlinalg.DenseMatrix(2, 2, [0, 1, 2, 3], True)
     mlDMt2 = mllibDMt.asML()
     self.assertEqual(mlDMt2, mlDMt1)
     # sparse
     mllibSM = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mlSM1 = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mlSM2 = mllibSM.asML()
     self.assertEqual(mlSM2, mlSM1)
     # transposed
     mllibSMt = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
     mlSMt1 = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
     mlSMt2 = mllibSMt.asML()
     self.assertEqual(mlSMt2, mlSMt1)
     # from ml
     # dense
     mllibDM1 = Matrices.dense(2, 2, [1, 2, 3, 4])
     mlDM = newlinalg.Matrices.dense(2, 2, [1, 2, 3, 4])
     mllibDM2 = Matrices.fromML(mlDM)
     self.assertEqual(mllibDM1, mllibDM2)
     # transposed
     mllibDMt1 = DenseMatrix(2, 2, [1, 2, 3, 4], True)
     mlDMt = newlinalg.DenseMatrix(2, 2, [1, 2, 3, 4], True)
     mllibDMt2 = Matrices.fromML(mlDMt)
     self.assertEqual(mllibDMt1, mllibDMt2)
     # sparse
     mllibSM1 = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mlSM = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
     mllibSM2 = Matrices.fromML(mlSM)
     self.assertEqual(mllibSM1, mllibSM2)
     # transposed
     mllibSMt1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
     mlSMt = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
     mllibSMt2 = Matrices.fromML(mlSMt)
     self.assertEqual(mllibSMt1, mllibSMt2)
Пример #7
0
def difun(x, vect):
    if (x[0] == x[1]):
        sm = SparseMatrix(
            SQUARE_BLOCK_SIZE, SQUARE_BLOCK_SIZE,
            np.linspace(0, SQUARE_BLOCK_SIZE, num=(SQUARE_BLOCK_SIZE + 1)),
            np.linspace(0, SQUARE_BLOCK_SIZE - 1, num=SQUARE_BLOCK_SIZE),
            vect[(x[0] * SQUARE_BLOCK_SIZE):((x[0] + 1) * SQUARE_BLOCK_SIZE)])
        return (x, sm)
    else:
        h = sparse.csc_matrix((SQUARE_BLOCK_SIZE, SQUARE_BLOCK_SIZE))
        return (x,
                Matrices.sparse(SQUARE_BLOCK_SIZE, SQUARE_BLOCK_SIZE, h.indptr,
                                h.indices, h.data))
Пример #8
0
    def test_sparse_matrix(self):
        # Test sparse matrix creation.
        sm1 = SparseMatrix(
            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
        self.assertEqual(sm1.numRows, 3)
        self.assertEqual(sm1.numCols, 4)
        self.assertEqual(sm1.colPtrs.tolist(), [0, 2, 2, 4, 4])
        self.assertEqual(sm1.rowIndices.tolist(), [1, 2, 1, 2])
        self.assertEqual(sm1.values.tolist(), [1.0, 2.0, 4.0, 5.0])
        self.assertTrue(
            repr(sm1),
            'SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0], False)')

        # Test indexing
        expected = [
            [0, 0, 0, 0],
            [1, 0, 4, 0],
            [2, 0, 5, 0]]

        for i in range(3):
            for j in range(4):
                self.assertEqual(expected[i][j], sm1[i, j])
        self.assertTrue(array_equal(sm1.toArray(), expected))

        for i, j in [(-1, 1), (4, 3), (3, 5)]:
            self.assertRaises(IndexError, sm1.__getitem__, (i, j))

        # Test conversion to dense and sparse.
        smnew = sm1.toDense().toSparse()
        self.assertEqual(sm1.numRows, smnew.numRows)
        self.assertEqual(sm1.numCols, smnew.numCols)
        self.assertTrue(array_equal(sm1.colPtrs, smnew.colPtrs))
        self.assertTrue(array_equal(sm1.rowIndices, smnew.rowIndices))
        self.assertTrue(array_equal(sm1.values, smnew.values))

        sm1t = SparseMatrix(
            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
            isTransposed=True)
        self.assertEqual(sm1t.numRows, 3)
        self.assertEqual(sm1t.numCols, 4)
        self.assertEqual(sm1t.colPtrs.tolist(), [0, 2, 3, 5])
        self.assertEqual(sm1t.rowIndices.tolist(), [0, 1, 2, 0, 2])
        self.assertEqual(sm1t.values.tolist(), [3.0, 2.0, 4.0, 9.0, 8.0])

        expected = [
            [3, 2, 0, 0],
            [0, 0, 4, 0],
            [9, 0, 8, 0]]

        for i in range(3):
            for j in range(4):
                self.assertEqual(expected[i][j], sm1t[i, j])
        self.assertTrue(array_equal(sm1t.toArray(), expected))
Пример #9
0
 def difun(self, x, vect):
     squareBlockSize = copy.deepcopy(self.squareBlockSize)
     if (x[0] == x[1]):
         sm = SparseMatrix(
             squareBlockSize, squareBlockSize,
             np.linspace(0, squareBlockSize, num=(squareBlockSize + 1)),
             np.linspace(0, squareBlockSize - 1, num=squareBlockSize),
             vect[(x[0] * squareBlockSize):((x[0] + 1) * squareBlockSize)])
         return (x, sm)
     else:
         h = sparse.csc_matrix((squareBlockSize, squareBlockSize))
         return (x,
                 Matrices.sparse(squareBlockSize, squareBlockSize, h.indptr,
                                 h.indices, h.data))
Пример #10
0
 def test_eq(self):
     v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
     v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
     v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
     v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
     v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
     v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
     dm1 = DenseMatrix(2, 2, [2, 0, 0, 0])
     sm1 = SparseMatrix(2, 2, [0, 2, 3], [0], [2])
     self.assertEqual(v1, v2)
     self.assertEqual(v1, v3)
     self.assertFalse(v2 == v4)
     self.assertFalse(v1 == v5)
     self.assertFalse(v1 == v6)
     # this is done as Dense and Sparse matrices can be semantically
     # equal while still implementing a different __eq__ method
     self.assertEqual(dm1, sm1)
     self.assertEqual(sm1, dm1)