def run():
    if not np.SPMD_MODE:
        print "[rank %d] Warning - ignored in non-SPMD mode\n"%(np.RANK),
    try:#This test requires the pyHPC module
        import pyHPC
        print "[rank %d] Warning - ignored pyHPC not found\n"%(np.RANK),

    #Non-view test - identical to the one in test_dot.py
    niter = 6
    for m in range(2,niter+2):
        for n in range(2,niter+2):
            for k in range(2,niter+2):
                Asrc = dnumpytest.random_list([k,m])
                Bsrc = dnumpytest.random_list([n,k])
                Ad = np.array(Asrc, dtype=float, dist=True)
                Af = np.array(Asrc, dtype=float, dist=False)
                Bd = np.array(Bsrc, dtype=float, dist=True)
                Bf = np.array(Bsrc, dtype=float, dist=False)
                Cd = pyHPC.summa(Ad,Bd)
                Cf = np.dot(Af,Bf)
                if not dnumpytest.array_equal(Cd,Cf):
                    raise Exception("Uncorrect result matrix\n")
    niter *= 2
    Asrc = dnumpytest.random_list([niter,niter])
    Bsrc = dnumpytest.random_list([niter,niter])
    Ad = np.array(Asrc, dtype=float, dist=True)
    Af = np.array(Asrc, dtype=float, dist=False)
    Bd = np.array(Bsrc, dtype=float, dist=True)
    Bf = np.array(Bsrc, dtype=float, dist=False)
    Cd = np.zeros((niter,niter),dtype=float, dist=True)
    for m in xrange(0,niter-BS, BS):
        for n in xrange(0,niter-BS,BS):
            for k in xrange(0,niter-BS,BS):
                tAd = Ad[m:,k:]
                tAf = Af[m:,k:]
                tBd = Bd[k:,n:]
                tBf = Bf[k:,n:]
                tCd = Cd[m:,n:]
                tCd = pyHPC.matmul(tAd,tBd)
                tCf = np.dot(tAf,tBf)
                if not dnumpytest.array_equal(tCd,tCf):
                    raise Exception("Uncorrect result matrix\n")
    for m in xrange(BS,niter+BS, BS):
        for n in xrange(BS,niter+BS,BS):
            for k in xrange(BS,niter+BS,BS):
                tAd = Ad[:m,:k]
                tAf = Af[:m,:k]
                tBd = Bd[:k,:n]
                tBf = Bf[:k,:n]
                tCd = Cd[:m,:n]
                tCd = pyHPC.matmul(tAd,tBd)
                tCf = np.dot(tAf,tBf)
                if not dnumpytest.array_equal(tCd,tCf):
                    raise Exception("Uncorrect result matrix\n")
def run():
    if not np.SPMD_MODE:
        print "[rank %d] Warning - ignored in non-SPMD mode\n" % (np.RANK),
    try:  #This test requires the pyHPC module
        import pyHPC
        print "[rank %d] Warning - ignored pyHPC not found\n" % (np.RANK),

    #Non-view test - identical to the one in test_dot.py
    niter = 6
    for m in range(2, niter + 2):
        for n in range(2, niter + 2):
            for k in range(2, niter + 2):
                Asrc = dnumpytest.random_list([k, m])
                Bsrc = dnumpytest.random_list([n, k])
                Ad = np.array(Asrc, dtype=float, dist=True)
                Af = np.array(Asrc, dtype=float, dist=False)
                Bd = np.array(Bsrc, dtype=float, dist=True)
                Bf = np.array(Bsrc, dtype=float, dist=False)
                Cd = pyHPC.summa(Ad, Bd)
                Cf = np.dot(Af, Bf)
                if not dnumpytest.array_equal(Cd, Cf):
                    raise Exception("Uncorrect result matrix\n")
    niter *= 2
    Asrc = dnumpytest.random_list([niter, niter])
    Bsrc = dnumpytest.random_list([niter, niter])
    Ad = np.array(Asrc, dtype=float, dist=True)
    Af = np.array(Asrc, dtype=float, dist=False)
    Bd = np.array(Bsrc, dtype=float, dist=True)
    Bf = np.array(Bsrc, dtype=float, dist=False)
    Cd = np.zeros((niter, niter), dtype=float, dist=True)
    for m in xrange(0, niter - BS, BS):
        for n in xrange(0, niter - BS, BS):
            for k in xrange(0, niter - BS, BS):
                tAd = Ad[m:, k:]
                tAf = Af[m:, k:]
                tBd = Bd[k:, n:]
                tBf = Bf[k:, n:]
                tCd = Cd[m:, n:]
                tCd = pyHPC.matmul(tAd, tBd)
                tCf = np.dot(tAf, tBf)
                if not dnumpytest.array_equal(tCd, tCf):
                    raise Exception("Uncorrect result matrix\n")
    for m in xrange(BS, niter + BS, BS):
        for n in xrange(BS, niter + BS, BS):
            for k in xrange(BS, niter + BS, BS):
                tAd = Ad[:m, :k]
                tAf = Af[:m, :k]
                tBd = Bd[:k, :n]
                tBf = Bf[:k, :n]
                tCd = Cd[:m, :n]
                tCd = pyHPC.matmul(tAd, tBd)
                tCf = np.dot(tAf, tBf)
                if not dnumpytest.array_equal(tCd, tCf):
                    raise Exception("Uncorrect result matrix\n")
def lu(matrix):
    Compute LU decompostion of a matrix.

    a : array, shape (M, M)
        Array to decompose

    p : array, shape (M, M)
        Permutation matrix
    l : array, shape (M, M)
        Lower triangular or trapezoidal matrix with unit diagonal.
    u : array, shape (M, M)
        Upper triangular or trapezoidal matrix
    SIZE = matrix.shape[0]

    if matrix.shape[0] != matrix.shape[0]:
        raise Exception("LU only supports squared matricis")
    if not matrix.dist():
        raise Exception("The matrix is not distributed")

    if (SIZE % np.BLOCKSIZE != 0):
        raise Exception("The matrix dimensions must be divisible "\
                        "with np.BLOCKSIZE(%d)"%np.BLOCKSIZE)

    (prow, pcol) = matrix.pgrid()
    A = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    A += matrix
    L = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    U = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    tmpL = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    tmpU = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    for k in xrange(0, SIZE, BS):
        bs = min(BS, SIZE - k)  #Current block size
        kb = k / BS  # k as block index

        #Compute vertical multiplier
        slice = ((kb, kb + 1), (kb, kb + 1))
        for a, l, u in zip(A.blocks(slice), L.blocks(slice), U.blocks(slice)):
            (p, tl, tu) = linalg.lu(a)
            if not (np.diag(p) == 1).all():  #We do not support pivoting
                raise Exception("Pivoting was needed!")
            #There seems to be a transpose bug in SciPy's LU
            l[:] = tl.T
            u[:] = tu.T

        #Replicate diagonal block horizontal and vertical
        for tk in xrange(k + bs, SIZE, BS):
            tbs = min(BS, SIZE - tk)  #Current block size
            L[tk:tk + tbs, k:k + bs] = U[k:k + tbs, k:k + bs]
            U[k:k + bs, tk:tk + tbs] = L[k:k + bs, k:k + tbs]

        if k + bs < SIZE:
            #Compute horizontal multiplier
            slice = ((kb, kb + 1), (kb + 1, SIZE / BS))
            for a, u in zip(A.blocks(slice), U.blocks(slice)):
                u[:] = np.linalg.solve(u.T, a.T).T

            #Compute vertical multiplier
            slice = ((kb + 1, SIZE / BS), (kb, kb + 1))
            for a, l in zip(A.blocks(slice), L.blocks(slice)):
                l[:] = np.linalg.solve(l, a)

            #Apply to remaining submatrix
            A -= pyHPC.summa(L[:, :k + bs],
                             U[:k + bs, :],
                             ao=(k + bs, k),
                             bo=(k, k + bs),
                             co=(k + bs, k + bs))

    return (L, U)
def lu(matrix):
    Compute LU decompostion of a matrix.

    a : array, shape (M, M)
        Array to decompose

    p : array, shape (M, M)
        Permutation matrix
    l : array, shape (M, M)
        Lower triangular or trapezoidal matrix with unit diagonal.
    u : array, shape (M, M)
        Upper triangular or trapezoidal matrix
    SIZE = matrix.shape[0]

    if matrix.shape[0] != matrix.shape[0]:
        raise Exception("LU only supports squared matricis")
    if not matrix.dist():
        raise Exception("The matrix is not distributed")

    if SIZE % np.BLOCKSIZE != 0:
        raise Exception("The matrix dimensions must be divisible " "with np.BLOCKSIZE(%d)" % np.BLOCKSIZE)

    (prow, pcol) = matrix.pgrid()
    A = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    A += matrix
    L = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    U = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    tmpL = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    tmpU = np.zeros((SIZE, SIZE), dtype=matrix.dtype, dist=True)
    for k in xrange(0, SIZE, BS):
        bs = min(BS, SIZE - k)  # Current block size
        kb = k / BS  # k as block index

        # Compute vertical multiplier
        slice = ((kb, kb + 1), (kb, kb + 1))
        for a, l, u in zip(A.blocks(slice), L.blocks(slice), U.blocks(slice)):
            (p, tl, tu) = linalg.lu(a)
            if not (np.diag(p) == 1).all():  # We do not support pivoting
                raise Exception("Pivoting was needed!")
            # There seems to be a transpose bug in SciPy's LU
            l[:] = tl.T
            u[:] = tu.T

        # Replicate diagonal block horizontal and vertical
        for tk in xrange(k + bs, SIZE, BS):
            tbs = min(BS, SIZE - tk)  # Current block size
            L[tk : tk + tbs, k : k + bs] = U[k : k + tbs, k : k + bs]
            U[k : k + bs, tk : tk + tbs] = L[k : k + bs, k : k + tbs]

        if k + bs < SIZE:
            # Compute horizontal multiplier
            slice = ((kb, kb + 1), (kb + 1, SIZE / BS))
            for a, u in zip(A.blocks(slice), U.blocks(slice)):
                u[:] = np.linalg.solve(u.T, a.T).T

            # Compute vertical multiplier
            slice = ((kb + 1, SIZE / BS), (kb, kb + 1))
            for a, l in zip(A.blocks(slice), L.blocks(slice)):
                l[:] = np.linalg.solve(l, a)

            # Apply to remaining submatrix
            A -= pyHPC.summa(L[:, : k + bs], U[: k + bs, :], ao=(k + bs, k), bo=(k, k + bs), co=(k + bs, k + bs))

    return (L, U)