def idd_lssolve(queue, m, n, a, krank): for j in range(n - krank): blas.trsv(queue, a[:krank,:krank], a[:krank,krank+j],lower=False) ctx = queue.get_info(cl.command_queue_info.CONTEXT) prg = cl.Program(ctx, util.get_source('id_kerns.cl')).build() prg.moveup(queue, [krank, n-krank], None, a.data, np.int32(krank), np.int32(n))
A = np.tril(A) # allocate OpenCL memory on the device clA = Array(queue, A.shape, A.dtype) clA_upper = Array(queue, A.shape, A.dtype) clx = Array(queue, x.shape, x.dtype) clx1 = Array(queue, x1.shape, x1.dtype) clx2 = Array(queue, x2.shape, x2.dtype) # copy data to device clA.set(A) clA_upper.set(A_upper) clx.set(x) # compute a triangular solve (trsv) blas.trsv(queue, clA, clx) # check the result print("Expected: ", np.linalg.solve(A, x)) print("Actual: ", clx.get()) print() # try a triangular solve with the transpose clx1.set(x1) blas.trsv(queue, clA, clx1, transA=True) print("Expected: ", np.linalg.solve(A.T, x1)) print("Actual: ", clx1.get()) print() # trye an upper triangular solve clx2.set(x2)