def common_impl(a, out, dpnp_func, print_debug): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(a.itemsize, sycl_queue) axes, axes_ndim = 0, 0 initial = 0 where = 0 dpnp_func(out_usm, a_usm, a.shapeptr, a.ndim, axes, axes_ndim, initial, where) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if print_debug: print("dpnp implementation")
def common_impl_multinomial(n, pvals, res, dpnp_func, print_debug): sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) pvals_usm = dpctl_functions.malloc_shared(pvals.size * pvals.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, pvals_usm, pvals.ctypes, pvals.size * pvals.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpnp_func(res_usm, n, pvals_usm, pvals.size, res.size) event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm, res.size * res.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(res_usm, sycl_queue) dpctl_functions.free_with_queue(pvals_usm, sycl_queue) dpnp_ext._dummy_liveness_func([res.size]) if print_debug: print("dpnp implementation")
def dpnp_impl(a): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out = np.empty(1, dtype=res_dtype) out_usm = dpctl_functions.malloc_shared(out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out[0]
def dpnp_impl(a): n = a.shape[-1] if a.shape[-2] != n: raise ValueError("Input array must be square.") out = a.copy() if n == 0: return out sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.shapeptr) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([out.size, a.size]) if PRINT_DEBUG: print("dpnp implementation") return out
def dpnp_impl(a, offset=0): if a.size == 0: raise ValueError("Passed Empty array") n = min(a.shape[0], a.shape[1]) res_shape = np.zeros(a.ndim - 1, dtype=np.int64) if a.ndim > 2: for i in range(a.ndim - 2): res_shape[i] = a.shape[i + 2] if (n + offset) > a.shape[1]: res_shape[-1] = a.shape[1] - offset elif (n + offset) > a.shape[0]: res_shape[-1] = a.shape[0] else: res_shape[-1] = n + offset shape = tuplizer(res_shape) out = np.empty(shape, dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy( sycl_queue, a_usm, a.ctypes, a.size * a.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared( out.size * out.itemsize, sycl_queue ) dpnp_func( a_usm, a.size * a.itemsize, out_usm, offset, a.shapeptr, out.shapeptr, out.ndim, ) event = dpctl_functions.queue_memcpy( sycl_queue, out.ctypes, out_usm, out.size * out.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out
def common_shape_impl(a, out, dpnp_func, PRINT_DEBUG): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation")
def dpnp_impl(a): if a.size == 0: return True out = np.empty(1, dtype=np.bool_) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") # TODO: sometimes all() returns ndarray return out[0]
def dpnp_impl(a): if a.size == 0: raise ValueError("Passed Empty array") sycl_queue = dpctl_functions.get_current_queue() """ We have to pass a array in double precision to DpNp """ if copy_input_to_double: a_copy_in_double = a.astype(np.float64) else: a_copy_in_double = a a_usm = dpctl_functions.malloc_shared( a_copy_in_double.size * a_copy_in_double.itemsize, sycl_queue ) event = dpctl_functions.queue_memcpy( sycl_queue, a_usm, a_copy_in_double.ctypes, a_copy_in_double.size * a_copy_in_double.itemsize, ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) if a.ndim == 2: rows = a.shape[0] cols = a.shape[1] out = np.empty((rows, rows), dtype=res_dtype) elif a.ndim == 1: rows = 1 cols = a.shape[0] out = np.empty(rows, dtype=res_dtype) out_usm = dpctl_functions.malloc_shared( out.size * out.itemsize, sycl_queue ) dpnp_func(a_usm, out_usm, rows, cols) event = dpctl_functions.queue_memcpy( sycl_queue, out.ctypes, out_usm, out.size * out.itemsize ) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a_copy_in_double.size, a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") if a.ndim == 2: return out elif a.ndim == 1: return out[0]
def dpnp_impl(a): n = a.shape[-1] if a.shape[-2] != n: msg = "Last 2 dimensions of the array must be square." raise ValueError(msg) dpnp_ext._check_finite_matrix(a) wr = np.empty(n, dtype=res_dtype) vr = np.empty((n, n), dtype=res_dtype) if n == 0: return (wr, vr) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) wr_usm = dpctl_functions.malloc_shared(wr.size * wr.itemsize, sycl_queue) vr_usm = dpctl_functions.malloc_shared(vr.size * vr.itemsize, sycl_queue) dpnp_eig(a_usm, wr_usm, vr_usm, n) event = dpctl_functions.queue_memcpy(sycl_queue, wr.ctypes, wr_usm, wr.size * wr.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) event = dpctl_functions.queue_memcpy(sycl_queue, vr.ctypes, vr_usm, vr.size * vr.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(wr_usm, sycl_queue) dpctl_functions.free_with_queue(vr_usm, sycl_queue) dpnp_ext._dummy_liveness_func([wr.size, vr.size]) if PRINT_DEBUG: print("dpnp implementation") return (wr, vr)
def dpnp_impl(a, kth): if a.size == 0: raise ValueError("Passed Empty array") kth_ = kth if kth >= 0 else (a.ndim + kth) arr2 = numba_dppy.dpnp.copy(a) out = np.empty(a.shape, dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) arr2_usm = dpctl_functions.malloc_shared(arr2.size * arr2.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, arr2_usm, arr2.ctypes, arr2.size * arr2.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, arr2_usm, out_usm, kth_, a.shapeptr, a.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(arr2_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, arr2.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out
def dpnp_impl(a): n = a.shape[-1] if a.shape[-2] != n: raise ValueError("Input array must be square.") dpnp_ext._check_finite_matrix(a) if a.ndim == 2: out = np.empty((1, ), dtype=a.dtype) out[0] = -4 else: out = np.empty(a.shape[:-2], dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([out.size, a.size]) if PRINT_DEBUG: print("dpnp implementation") if a.ndim == 2: return out[0] else: return out
def dpnp_impl(M, tol=None, hermitian=False): if tol is not None: raise ValueError( "tol is not supported for np.linalg.matrix_rank(M)") if hermitian: raise ValueError( "hermitian is not supported for np.linalg.matrix_rank(M)") if M.ndim > 2: raise ValueError( "np.linalg.matrix_rank(M) is only supported on 1 or 2-d arrays" ) out = np.empty(1, dtype=M.dtype) sycl_queue = dpctl_functions.get_current_queue() M_usm = dpctl_functions.malloc_shared(M.size * M.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, M_usm, M.ctypes, M.size * M.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(M_usm, out_usm, M.shapeptr, M.ndim) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(M_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([out.size, M.size]) if PRINT_DEBUG: print("dpnp implementation") return out[0]
def dpnp_impl(a, repeats): if a.size == 0: raise ValueError("Passed Empty array") if a.ndim >= 2: raise ValueError("Not supported in dpnp") new_size = a.size * repeats out = np.zeros(new_size, dtype=a.dtype) sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) dpnp_func(a_usm, out_usm, repeats, a.size) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, out.size]) if PRINT_DEBUG: print("dpnp implementation") return out
def common_impl_multivariate_normal(mean, cov, size, check_valid, tol, res, dpnp_func, print_debug): sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) mean_usm = dpctl_functions.malloc_shared(mean.size * mean.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, mean_usm, mean.ctypes, mean.size * mean.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) cov_usm = dpctl_functions.malloc_shared(cov.size * cov.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, cov_usm, cov.ctypes, cov.size * cov.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpnp_func(res_usm, mean.size, mean_usm, mean.size, cov_usm, cov.size, res.size) event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm, res.size * res.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(res_usm, sycl_queue) dpctl_functions.free_with_queue(mean_usm, sycl_queue) dpctl_functions.free_with_queue(cov_usm, sycl_queue) dpnp_ext._dummy_liveness_func([res.size]) if print_debug: print("dpnp implementation")
def common_impl(low, high, res, dpnp_func, print_debug): check_range(low, high) sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) dpnp_func(res_usm, low, high, res.size) event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm, res.size * res.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(res_usm, sycl_queue) dpnp_ext._dummy_liveness_func([res.size]) if print_debug: print("dpnp implementation")
def common_impl_hypergeometric(ngood, nbad, nsample, res, dpnp_func, print_debug): sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) dpnp_func(res_usm, ngood, nbad, nsample, res.size) event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm, res.size * res.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(res_usm, sycl_queue) dpnp_ext._dummy_liveness_func([res.size]) if print_debug: print("dpnp implementation")
def common_impl_1_arg(arg1, res, dpnp_func, print_debug): sycl_queue = dpctl_functions.get_current_queue() res_usm = dpctl_functions.malloc_shared(res.size * res.itemsize, sycl_queue) try: dpnp_func(res_usm, arg1, res.size) except Exception: raise ValueError("Device not supported") event = dpctl_functions.queue_memcpy(sycl_queue, res.ctypes, res_usm, res.size * res.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(res_usm, sycl_queue) dpnp_ext._dummy_liveness_func([res.size]) if print_debug: print("dpnp implementation")
def common_dot_impl(dpnp_func, a, b, out, m, print_debug): sycl_queue = dpctl_functions.get_current_queue() a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) b_usm = dpctl_functions.malloc_shared(b.size * b.itemsize, sycl_queue) event = dpctl_functions.queue_memcpy(sycl_queue, b_usm, b.ctypes, b.size * b.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue) strides = np.array(1) result_out = out_usm result_size = out.size result_ndim = out.ndim result_shape = out.shapeptr result_strides = strides.ctypes input1_in = a_usm input1_size = a.size input1_ndim = a.ndim input1_shape = a.shapeptr input1_strides = strides.ctypes input2_in = b_usm input2_size = b.size input2_ndim = b.ndim input2_shape = b.shapeptr input2_strides = strides.ctypes dpnp_func( result_out, result_size, result_ndim, result_shape, result_strides, input1_in, input1_size, input1_ndim, input1_shape, input1_strides, input2_in, input2_size, input2_ndim, input2_shape, input2_strides, ) event = dpctl_functions.queue_memcpy(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize) dpctl_functions.event_wait(event) dpctl_functions.event_delete(event) dpctl_functions.free_with_queue(a_usm, sycl_queue) dpctl_functions.free_with_queue(b_usm, sycl_queue) dpctl_functions.free_with_queue(out_usm, sycl_queue) dpnp_ext._dummy_liveness_func([a.size, b.size, out.size]) if print_debug: print("dpnp implementation")