Пример #1
0
                def t3():
                    out = clone_here(a[i,
                                       j])  # Move data to the current device
                    rhs1 = clone_here(a[i, k])
                    rhs2 = clone_here(a[j, k])

                    out = update(rhs1, rhs2, out)
                    copy(a[i, j], out)  # Move the result to the global array
Пример #2
0
                def t3():
                    out = clone_here(a[i,
                                       j])  # Move data to the current device
                    rhs1 = clone_here(a[i, k])
                    rhs2 = clone_here(a[j, k])

                    out -= rhs1 @ rhs2.T

                    copy(a[i, j], out)  # Move the result to the global array
Пример #3
0
 def matmul_task():
     old_device = cp.cuda.Device()
     #local_start = time.perf_counter()
     b_block_local = clone_here(b_block)
     #cp.cuda.get_current_stream().synchronize()
     #communication_stop = time.perf_counter()
     # cupy doesn't support the out argument for matmul yet so we have to copy.
     # cp.matmul(a_block, b_block_local.T, out = c_block)
     c_block[:] = a_block @ b_block_local.T
Пример #4
0
    def __getitem__(self, index: IndexType):  # -> Union[Array, List[Array]]
        """
        Read partitions and make sure they are on the current device.

        :param index: index of the target partition(s).

        .. todo:
            Multiple partitions are currently returned as a Python list of partitions (ndarrays).
        """
        if not isinstance(index, tuple):
            index = (index, )
        ret = []
        parse_index(
            self._latest_view,
            index,
            step=lambda I, i: I[i],
            stop=lambda x: ret.append(clone_here(x) if is_array(x) else x))
        if len(ret) == 1:
            if ret[0] is None: warn("Partition has been freed!")
            return ret[0]
        warn(
            "Multiple partitions are currently returned as a Python list of partitions (ndarrays)."
        )
        return ret
Пример #5
0
 def t2():
     dblock = clone_here(a[j, j])
     dblock = cholesky(dblock)
     copy(a[j, j], dblock)
Пример #6
0
            def t1():
                out = clone_here(a[j, j])  # Move data to the current device
                rhs = clone_here(a[j, k])

                out = update(rhs, rhs, out)
                copy(a[j, j], out)  # Move the result to the global array
Пример #7
0
def get_gpu_memory(i:int, j:int, num_gpus:int):
  dev_id   = i % num_gpus
  local_id = i // num_gpus
  src = gpu_arrs[dev_id][local_id][j]
  dst = clone_here(src)
  return dst
Пример #8
0
 def t3():
     out = clone_here(a[i, j])
     rhs1 = clone_here(a[i, k])
     rhs2 = clone_here(a[j, k])
     out -= rhs1 @ rhs2.T
     copy(a[i, j], out)
Пример #9
0
 def t1():
     out = clone_here(a[j, j])
     rhs = clone_here(a[j, k])
     out -= rhs @ rhs.T
     copy(a[j, j], out)
Пример #10
0
def cholesky_inplace(a):
    if a.shape[0] != a.shape[1]:
        raise ValueError("A square array is required.")
    ca = clone_here(a)
    ca[:] = cupy.linalg.cholesky(ca)
    copy(a, ca)
 def t4():
     factor = clone_here(a[j, j])
     panel = clone_here(a[i, j])
     panel = ltriang_solve(factor, panel)
     copy(a[i, j], panel)
Пример #12
0
 def t1():
     #print("t1[", i, "] start", sep='')
     #copy_start = time.time()
     A_block_local = clone_here(A_block)
     #copy_end = time.time()
     Q1_blocked[i], R1[R1_lower:R1_upper] = qr_block(A_block_local)
Пример #13
0
 def t3():
     #print("t3[", i, "] start", sep='')
     Q1_block_local = clone_here(Q1_blocked[i])
     Q2_block_local = clone_here(Q2_block)
     Q[Q_lower:Q_upper] = matmul_block(Q1_block_local, Q2_block_local)