def __abs__(self): """Return a `GPUArray` of the absolute values of the elements of `self`. """ result = self._new_like_me() if self.dtype == np.float32: fname = "fabsf" elif self.dtype == np.float64: fname = "fabs" else: fname = "abs" if issubclass(self.dtype.type, np.complexfloating): from pytools import match_precision out_dtype = match_precision(np.dtype(np.float64), self.dtype) result = self._new_like_me(out_dtype) else: out_dtype = self.dtype func = elementwise.get_unary_func_kernel(fname, self.dtype, out_dtype=out_dtype) func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size) return result
def __init__(self, order, startup_stepper=None, dtype=numpy.float64, rcon=None): self.f_history = [] from pytools import match_precision self.dtype = numpy.dtype(dtype) self.scalar_dtype = match_precision(numpy.dtype(numpy.float64), self.dtype) self.coefficients = numpy.asarray(make_ab_coefficients(order), dtype=self.scalar_dtype) if startup_stepper is not None: self.startup_stepper = startup_stepper else: from hedge.timestep.runge_kutta import LSRK4TimeStepper self.startup_stepper = LSRK4TimeStepper(self.dtype) from pytools.log import IntervalTimer, EventCounter timer_factory = IntervalTimer if rcon is not None: timer_factory = rcon.make_timer self.timer = timer_factory( "t_ab", "Time spent doing algebra in Adams-Bashforth") self.flop_counter = EventCounter( "n_flops_ab", "Floating point operations performed in AB")
def __init__(self, dtype=numpy.float64, rcon=None, vector_primitive_factory=None): if vector_primitive_factory is None: from hedge.vector_primitives import VectorPrimitiveFactory self.vector_primitive_factory = VectorPrimitiveFactory() else: self.vector_primitive_factory = vector_primitive_factory from pytools.log import IntervalTimer, EventCounter timer_factory = IntervalTimer if rcon is not None: timer_factory = rcon.make_timer self.timer = timer_factory("t_rk4", "Time spent doing algebra in RK4") self.flop_counter = EventCounter( "n_flops_rk4", "Floating point operations performed in RK4") from pytools import match_precision self.dtype = numpy.dtype(dtype) self.scalar_dtype = match_precision(numpy.dtype(numpy.float64), self.dtype) self.coeffs = numpy.array([self._RK4A, self._RK4B, self._RK4C], dtype=self.scalar_dtype).T
def squared_norm(self): a = self.data dtype_out = match_precision(np.dtype('float64'), a.dtype) out = a._new_like_me(dtype=dtype_out) krnl = get_norm_kernel(a.dtype, dtype_out) krnl(a, out) return out
def squared_norm(self): a = self.data dtype_out = match_precision(np.dtype("float64"), a.dtype) out = a._new_like_me(dtype=dtype_out) krnl = get_norm_kernel(a.dtype, dtype_out) krnl(a, out) return out
def __init__(self, order, startup_stepper=None, dtype=numpy.float64, rcon=None): self.f_history = [] from pytools import match_precision self.dtype = numpy.dtype(dtype) self.scalar_dtype = match_precision( numpy.dtype(numpy.float64), self.dtype) self.coefficients = numpy.asarray(make_ab_coefficients(order), dtype=self.scalar_dtype) if startup_stepper is not None: self.startup_stepper = startup_stepper else: from hedge.timestep.runge_kutta import LSRK4TimeStepper self.startup_stepper = LSRK4TimeStepper(self.dtype) from pytools.log import IntervalTimer, EventCounter timer_factory = IntervalTimer if rcon is not None: timer_factory = rcon.make_timer self.timer = timer_factory( "t_ab", "Time spent doing algebra in Adams-Bashforth") self.flop_counter = EventCounter( "n_flops_ab", "Floating point operations performed in AB")
def simple_result_dtype_getter(vector_dtype_map, scalar_dtype_map, const_dtypes): from pytools import common_dtype, match_precision result = common_dtype(vector_dtype_map.values()) scalar_dtypes = scalar_dtype_map.values() + const_dtypes if scalar_dtypes: prec_matched_scalar_dtype = match_precision( common_dtype(scalar_dtypes), dtype_to_match=result) result = common_dtype([result, prec_matched_scalar_dtype]) return result
def real(self): dtype = self.dtype if issubclass(dtype.type, np.complexfloating): from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) result = self._new_like_me(dtype=real_dtype) func = elementwise.get_real_kernel(dtype, real_dtype) func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size) return result else: return self
def bind(self, discr): compiled = discr.compile(self.op_template()) from pytools import match_precision scalar_type = match_precision( numpy.dtype(numpy.float64), discr.default_scalar_type).type kappa = scalar_type(self.kappa) eps0 = scalar_type(self.eps0) s_0 = scalar_type(self.s_0) def apply(u): return compiled(u=u, kappa=kappa, eps0=eps0, s_0=s_0) return apply
def __init__(self, use_high_order=True, dtype=numpy.float64, rcon=None, vector_primitive_factory=None, atol=0, rtol=0, max_dt_growth=5, min_dt_shrinkage=0.1, limiter=None): if vector_primitive_factory is None: from hedge.vector_primitives import VectorPrimitiveFactory self.vector_primitive_factory = VectorPrimitiveFactory() else: self.vector_primitive_factory = vector_primitive_factory from pytools.log import IntervalTimer, EventCounter timer_factory = IntervalTimer if rcon is not None: timer_factory = rcon.make_timer if limiter is None: self.limiter = lambda x: x else: self.limiter = limiter self.timer = timer_factory("t_rk", "Time spent doing algebra in Runge-Kutta") self.flop_counter = EventCounter( "n_flops_rk", "Floating point operations performed in Runge-Kutta") self.use_high_order = use_high_order self.dtype = numpy.dtype(dtype) self.adaptive = bool(atol or rtol) self.atol = atol self.rtol = rtol from pytools import match_precision self.scalar_dtype = match_precision(numpy.dtype(numpy.float64), self.dtype) self.max_dt_growth = max_dt_growth self.min_dt_shrinkage = min_dt_shrinkage self.linear_combiner_cache = {}
def imag(self): dtype = self.dtype if issubclass(self.dtype.type, numpy.complexfloating): from pytools import match_precision real_dtype = match_precision(numpy.dtype(numpy.float64), dtype) result = self._new_like_me(dtype=real_dtype) func = elementwise.get_imag_kernel(dtype, real_dtype) func.set_block_shape(*self._block) func.prepared_async_call(self._grid, None, self.gpudata, result.gpudata, self.mem_size) return result else: return zeros_like(self)
def imag(self): dtype = self.dtype if issubclass(self.dtype.type, np.complexfloating): if not self.flags.forc: raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation") from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) result = self._new_like_me(dtype=real_dtype) func = elementwise.get_imag_kernel(dtype, real_dtype) func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size) return result else: return zeros_like(self)
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = curand((n,), real_dtype).astype(dtype) + 1j * curand( (n,), real_dtype ).astype(dtype) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0 # verify conj with out parameter z_out = z.astype(np.complex64) assert z_out is z.conj(out=z_out) assert la.norm(z.get().conj() - z_out.get()) < 1e-7 # verify contiguity is preserved for order in ["C", "F"]: # test both zero and non-zero value code paths z_real = gpuarray.zeros(z.shape, dtype=real_dtype, order=order) z2 = z.reshape(z.shape, order=order) for zdata in [z_real, z2]: if order == "C": assert zdata.flags.c_contiguous assert zdata.real.flags.c_contiguous assert zdata.imag.flags.c_contiguous assert zdata.conj().flags.c_contiguous elif order == "F": assert zdata.flags.f_contiguous assert zdata.real.flags.f_contiguous assert zdata.imag.flags.f_contiguous assert zdata.conj().flags.f_contiguous
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = (curand((n, ), real_dtype).astype(dtype) + 1j * curand( (n, ), real_dtype).astype(dtype)) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = (curand((n,), real_dtype).astype(dtype) + 1j*curand((n,), real_dtype).astype(dtype)) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0
def __init__(self, use_high_order=True, dtype=numpy.float64, rcon=None, vector_primitive_factory=None, atol=0, rtol=0, max_dt_growth=5, min_dt_shrinkage=0.1, limiter=None): if vector_primitive_factory is None: from hedge.vector_primitives import VectorPrimitiveFactory self.vector_primitive_factory = VectorPrimitiveFactory() else: self.vector_primitive_factory = vector_primitive_factory from pytools.log import IntervalTimer, EventCounter timer_factory = IntervalTimer if rcon is not None: timer_factory = rcon.make_timer if limiter is None: self.limiter = lambda x: x else: self.limiter = limiter self.timer = timer_factory( "t_rk", "Time spent doing algebra in Runge-Kutta") self.flop_counter = EventCounter( "n_flops_rk", "Floating point operations performed in Runge-Kutta") self.use_high_order = use_high_order self.dtype = numpy.dtype(dtype) self.adaptive = bool(atol or rtol) self.atol = atol self.rtol = rtol from pytools import match_precision self.scalar_dtype = match_precision( numpy.dtype(numpy.float64), self.dtype) self.max_dt_growth = max_dt_growth self.min_dt_shrinkage = min_dt_shrinkage self.linear_combiner_cache = {}
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = (curand((n,), real_dtype).astype(dtype) + 1j*curand((n,), real_dtype).astype(dtype)) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0 # verify contiguity is preserved for order in ["C", "F"]: # test both zero and non-zero value code paths z_real = gpuarray.zeros(z.shape, dtype=real_dtype, order=order) z2 = z.reshape(z.shape, order=order) for zdata in [z_real, z2]: if order == "C": assert zdata.flags.c_contiguous == True assert zdata.real.flags.c_contiguous == True assert zdata.imag.flags.c_contiguous == True assert zdata.conj().flags.c_contiguous == True elif order == "F": assert zdata.flags.f_contiguous == True assert zdata.real.flags.f_contiguous == True assert zdata.imag.flags.f_contiguous == True assert zdata.conj().flags.f_contiguous == True
def __init__(self, dtype=numpy.float64, rcon=None, vector_primitive_factory=None): if vector_primitive_factory is None: from hedge.vector_primitives import VectorPrimitiveFactory self.vector_primitive_factory = VectorPrimitiveFactory() else: self.vector_primitive_factory = vector_primitive_factory from pytools.log import IntervalTimer, EventCounter timer_factory = IntervalTimer if rcon is not None: timer_factory = rcon.make_timer self.timer = timer_factory( "t_rk4", "Time spent doing algebra in RK4") self.flop_counter = EventCounter( "n_flops_rk4", "Floating point operations performed in RK4") from pytools import match_precision self.dtype = numpy.dtype(dtype) self.scalar_dtype = match_precision( numpy.dtype(numpy.float64), self.dtype) self.coeffs = numpy.array([self._RK4A, self._RK4B, self._RK4C], dtype=self.scalar_dtype).T
def squared_norm(self): dtype_out = match_precision(np.dtype('float64'), self.dtype) out = self.data._new_like_me(dtype=dtype_out) krnl = get_norm_kernel(self.dtype, dtype_out) krnl(self.data, out) return out