def __mul__(self, other): if isinstance(other, gpuarray.GPUArray): result = type(self)(np.broadcast_shapes(self.shape, other.shape), gpuarray._get_common_dtype(self, other)) return self._elwise_multiply(other, result) else: return super().__mul__(other)
def weighted_inner(self, b, w): if w is None: return self.inner(b) a = self.data dtype_out = _get_common_dtype(a, b) krnl = get_weighted_inner_kernel(a.dtype, b.dtype, w.dtype, dtype_out) return krnl(a, b, w).get().max()
def weighted_inner(self, b, w): if w is None: return self.inner(other) a = self.data dtype_out = _get_common_dtype(a, b) krnl = get_weighted_inner_kernel(a.dtype, b.dtype, w.dtype, dtype_out) return krnl(a, b, w).get().max()
def guarded_div(self, a, b): from pycuda.gpuarray import _get_common_dtype result = a._new_like_me(_get_common_dtype(a, b)) assert a.shape == b.shape func = self.guarded_div_kernel(a.dtype, b.dtype, result.dtype) func.prepared_async_call(a._grid, a._block, None, a.gpudata, b.gpudata, result.gpudata, a.mem_size) return result
def __add__(self, other, sub=False, rsub=False): """Add an array with an array or an array with a scalar.""" if isinstance(other, gpuarray.GPUArray): # add another vector result = type(self)(np.broadcast_shapes(self.shape, other.shape), gpuarray._get_common_dtype(self, other)) return self._axpbyz(-1 if rsub else 1, other, -1 if sub else 1, result) else: if sub: return super().__sub__(other) elif rsub: return super().__rsub__(other) else: return super().__add__(other)
def weighted_inner(a, b, w): dtype_out = _get_common_dtype(a, b) krnl = get_weighted_inner_kernel(a.dtype, b.dtype, w.dtype, dtype_out) return krnl(a, b, w)
def inner(a, b): dtype_out = _get_common_dtype(a, b) krnl = get_inner_kernel(a.dtype, b.dtype, dtype_out) return krnl(a, b)
def __init__(self, x, y, z): self.x = x.data self.y = y.data self.z = z.data dtype_out = _get_common_dtype(x, y) self.krnl = get_correlate_kernel(x.dtype, y.dtype, dtype_out)
def correlate(a, b, out, stream=None): dtype_out = _get_common_dtype(a, b) krnl = get_correlate_kernel(a.dtype, b.dtype, dtype_out) krnl(a.data, b.data, out.data)
def inner(self, b): a = self.data dtype_out = _get_common_dtype(a, b) krnl = get_inner_kernel(a.dtype, b.dtype, dtype_out) return krnl(a, b).get().max()
def correlate(a, b, out, stream=None): dtype_out = _get_common_dtype(a,b) krnl = get_correlate_kernel(a.dtype, b.dtype, dtype_out) krnl(a.data, b.data, out.data)