示例#1
0
    def __abs__(self):
        """Return a `GPUArray` of the absolute values of the elements
        of `self`.
        """

        result = self._new_like_me()

        if self.dtype == np.float32:
            fname = "fabsf"
        elif self.dtype == np.float64:
            fname = "fabs"
        else:
            fname = "abs"

        if issubclass(self.dtype.type, np.complexfloating):
            from pytools import match_precision
            out_dtype = match_precision(np.dtype(np.float64), self.dtype)
            result = self._new_like_me(out_dtype)
        else:
            out_dtype = self.dtype

        func = elementwise.get_unary_func_kernel(fname, self.dtype,
                out_dtype=out_dtype)

        func.prepared_async_call(self._grid, self._block, None,
                self.gpudata, result.gpudata, self.mem_size)

        return result
示例#2
0
    def __init__(self,
                 order,
                 startup_stepper=None,
                 dtype=numpy.float64,
                 rcon=None):
        self.f_history = []

        from pytools import match_precision
        self.dtype = numpy.dtype(dtype)
        self.scalar_dtype = match_precision(numpy.dtype(numpy.float64),
                                            self.dtype)
        self.coefficients = numpy.asarray(make_ab_coefficients(order),
                                          dtype=self.scalar_dtype)

        if startup_stepper is not None:
            self.startup_stepper = startup_stepper
        else:
            from hedge.timestep.runge_kutta import LSRK4TimeStepper
            self.startup_stepper = LSRK4TimeStepper(self.dtype)

        from pytools.log import IntervalTimer, EventCounter
        timer_factory = IntervalTimer
        if rcon is not None:
            timer_factory = rcon.make_timer

        self.timer = timer_factory(
            "t_ab", "Time spent doing algebra in Adams-Bashforth")
        self.flop_counter = EventCounter(
            "n_flops_ab", "Floating point operations performed in AB")
示例#3
0
    def __init__(self,
                 dtype=numpy.float64,
                 rcon=None,
                 vector_primitive_factory=None):
        if vector_primitive_factory is None:
            from hedge.vector_primitives import VectorPrimitiveFactory
            self.vector_primitive_factory = VectorPrimitiveFactory()
        else:
            self.vector_primitive_factory = vector_primitive_factory

        from pytools.log import IntervalTimer, EventCounter
        timer_factory = IntervalTimer
        if rcon is not None:
            timer_factory = rcon.make_timer

        self.timer = timer_factory("t_rk4", "Time spent doing algebra in RK4")
        self.flop_counter = EventCounter(
            "n_flops_rk4", "Floating point operations performed in RK4")

        from pytools import match_precision
        self.dtype = numpy.dtype(dtype)
        self.scalar_dtype = match_precision(numpy.dtype(numpy.float64),
                                            self.dtype)
        self.coeffs = numpy.array([self._RK4A, self._RK4B, self._RK4C],
                                  dtype=self.scalar_dtype).T
示例#4
0
def squared_norm(self):
    a = self.data
    dtype_out = match_precision(np.dtype('float64'), a.dtype)
    out = a._new_like_me(dtype=dtype_out)
    krnl = get_norm_kernel(a.dtype, dtype_out)
    krnl(a, out)
    return out
示例#5
0
def squared_norm(self):
    a = self.data
    dtype_out = match_precision(np.dtype("float64"), a.dtype)
    out = a._new_like_me(dtype=dtype_out)
    krnl = get_norm_kernel(a.dtype, dtype_out)
    krnl(a, out)
    return out
示例#6
0
    def __abs__(self):
        """Return a `GPUArray` of the absolute values of the elements
        of `self`.
        """

        result = self._new_like_me()

        if self.dtype == np.float32:
            fname = "fabsf"
        elif self.dtype == np.float64:
            fname = "fabs"
        else:
            fname = "abs"

        if issubclass(self.dtype.type, np.complexfloating):
            from pytools import match_precision
            out_dtype = match_precision(np.dtype(np.float64), self.dtype)
            result = self._new_like_me(out_dtype)
        else:
            out_dtype = self.dtype

        func = elementwise.get_unary_func_kernel(fname, self.dtype,
                out_dtype=out_dtype)

        func.prepared_async_call(self._grid, self._block, None,
                self.gpudata, result.gpudata, self.mem_size)

        return result
示例#7
0
文件: ab.py 项目: allansnielsen/hedge
    def __init__(self, order, startup_stepper=None, dtype=numpy.float64, rcon=None):
        self.f_history = []

        from pytools import match_precision
        self.dtype = numpy.dtype(dtype)
        self.scalar_dtype = match_precision(
                numpy.dtype(numpy.float64), self.dtype)
        self.coefficients = numpy.asarray(make_ab_coefficients(order),
                dtype=self.scalar_dtype)

        if startup_stepper is not None:
            self.startup_stepper = startup_stepper
        else:
            from hedge.timestep.runge_kutta import LSRK4TimeStepper
            self.startup_stepper = LSRK4TimeStepper(self.dtype)

        from pytools.log import IntervalTimer, EventCounter
        timer_factory = IntervalTimer
        if rcon is not None:
            timer_factory = rcon.make_timer

        self.timer = timer_factory(
                "t_ab", "Time spent doing algebra in Adams-Bashforth")
        self.flop_counter = EventCounter(
                "n_flops_ab", "Floating point operations performed in AB")
示例#8
0
def simple_result_dtype_getter(vector_dtype_map, scalar_dtype_map,
                               const_dtypes):
    from pytools import common_dtype, match_precision

    result = common_dtype(vector_dtype_map.values())

    scalar_dtypes = scalar_dtype_map.values() + const_dtypes
    if scalar_dtypes:
        prec_matched_scalar_dtype = match_precision(
            common_dtype(scalar_dtypes), dtype_to_match=result)
        result = common_dtype([result, prec_matched_scalar_dtype])

    return result
示例#9
0
def simple_result_dtype_getter(vector_dtype_map, scalar_dtype_map, const_dtypes):
    from pytools import common_dtype, match_precision

    result = common_dtype(vector_dtype_map.values())

    scalar_dtypes = scalar_dtype_map.values() + const_dtypes
    if scalar_dtypes:
        prec_matched_scalar_dtype = match_precision(
                common_dtype(scalar_dtypes),
                dtype_to_match=result)
        result = common_dtype([result, prec_matched_scalar_dtype])

    return result
示例#10
0
    def real(self):
        dtype = self.dtype
        if issubclass(dtype.type, np.complexfloating):
            from pytools import match_precision

            real_dtype = match_precision(np.dtype(np.float64), dtype)

            result = self._new_like_me(dtype=real_dtype)

            func = elementwise.get_real_kernel(dtype, real_dtype)
            func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size)

            return result
        else:
            return self
示例#11
0
    def real(self):
        dtype = self.dtype
        if issubclass(dtype.type, np.complexfloating):
            from pytools import match_precision
            real_dtype = match_precision(np.dtype(np.float64), dtype)

            result = self._new_like_me(dtype=real_dtype)

            func = elementwise.get_real_kernel(dtype, real_dtype)
            func.prepared_async_call(self._grid, self._block, None,
                    self.gpudata, result.gpudata,
                    self.mem_size)

            return result
        else:
            return self
示例#12
0
    def bind(self, discr):
        compiled = discr.compile(self.op_template())

        from pytools import match_precision
        scalar_type = match_precision(
                numpy.dtype(numpy.float64),
                discr.default_scalar_type).type

        kappa = scalar_type(self.kappa)
        eps0 = scalar_type(self.eps0)
        s_0 = scalar_type(self.s_0)

        def apply(u):
            return compiled(u=u, kappa=kappa, eps0=eps0, s_0=s_0)

        return apply
示例#13
0
    def __init__(self,
                 use_high_order=True,
                 dtype=numpy.float64,
                 rcon=None,
                 vector_primitive_factory=None,
                 atol=0,
                 rtol=0,
                 max_dt_growth=5,
                 min_dt_shrinkage=0.1,
                 limiter=None):
        if vector_primitive_factory is None:
            from hedge.vector_primitives import VectorPrimitiveFactory
            self.vector_primitive_factory = VectorPrimitiveFactory()
        else:
            self.vector_primitive_factory = vector_primitive_factory

        from pytools.log import IntervalTimer, EventCounter
        timer_factory = IntervalTimer
        if rcon is not None:
            timer_factory = rcon.make_timer

        if limiter is None:
            self.limiter = lambda x: x
        else:
            self.limiter = limiter

        self.timer = timer_factory("t_rk",
                                   "Time spent doing algebra in Runge-Kutta")
        self.flop_counter = EventCounter(
            "n_flops_rk", "Floating point operations performed in Runge-Kutta")

        self.use_high_order = use_high_order

        self.dtype = numpy.dtype(dtype)

        self.adaptive = bool(atol or rtol)
        self.atol = atol
        self.rtol = rtol

        from pytools import match_precision
        self.scalar_dtype = match_precision(numpy.dtype(numpy.float64),
                                            self.dtype)

        self.max_dt_growth = max_dt_growth
        self.min_dt_shrinkage = min_dt_shrinkage

        self.linear_combiner_cache = {}
示例#14
0
    def imag(self):
        dtype = self.dtype
        if issubclass(self.dtype.type, numpy.complexfloating):
            from pytools import match_precision
            real_dtype = match_precision(numpy.dtype(numpy.float64), dtype)

            result = self._new_like_me(dtype=real_dtype)

            func = elementwise.get_imag_kernel(dtype, real_dtype)
            func.set_block_shape(*self._block)
            func.prepared_async_call(self._grid, None,
                    self.gpudata, result.gpudata,
                    self.mem_size)

            return result
        else:
            return zeros_like(self)
示例#15
0
    def imag(self):
        dtype = self.dtype
        if issubclass(self.dtype.type, np.complexfloating):
            if not self.flags.forc:
                raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation")

            from pytools import match_precision

            real_dtype = match_precision(np.dtype(np.float64), dtype)

            result = self._new_like_me(dtype=real_dtype)

            func = elementwise.get_imag_kernel(dtype, real_dtype)
            func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size)

            return result
        else:
            return zeros_like(self)
示例#16
0
    def test_complex_bits(self):
        from pycuda.curandom import rand as curand

        if has_double_support():
            dtypes = [np.complex64, np.complex128]
        else:
            dtypes = [np.complex64]

        n = 20
        for tp in dtypes:
            dtype = np.dtype(tp)
            from pytools import match_precision

            real_dtype = match_precision(np.dtype(np.float64), dtype)

            z = curand((n,), real_dtype).astype(dtype) + 1j * curand(
                (n,), real_dtype
            ).astype(dtype)

            assert la.norm(z.get().real - z.real.get()) == 0
            assert la.norm(z.get().imag - z.imag.get()) == 0
            assert la.norm(z.get().conj() - z.conj().get()) == 0
            # verify conj with out parameter
            z_out = z.astype(np.complex64)
            assert z_out is z.conj(out=z_out)
            assert la.norm(z.get().conj() - z_out.get()) < 1e-7

            # verify contiguity is preserved
            for order in ["C", "F"]:
                # test both zero and non-zero value code paths
                z_real = gpuarray.zeros(z.shape, dtype=real_dtype, order=order)
                z2 = z.reshape(z.shape, order=order)
                for zdata in [z_real, z2]:
                    if order == "C":
                        assert zdata.flags.c_contiguous
                        assert zdata.real.flags.c_contiguous
                        assert zdata.imag.flags.c_contiguous
                        assert zdata.conj().flags.c_contiguous
                    elif order == "F":
                        assert zdata.flags.f_contiguous
                        assert zdata.real.flags.f_contiguous
                        assert zdata.imag.flags.f_contiguous
                        assert zdata.conj().flags.f_contiguous
示例#17
0
    def imag(self):
        dtype = self.dtype
        if issubclass(self.dtype.type, np.complexfloating):
            if not self.flags.forc:
                raise RuntimeError("only contiguous arrays may "
                        "be used as arguments to this operation")

            from pytools import match_precision
            real_dtype = match_precision(np.dtype(np.float64), dtype)

            result = self._new_like_me(dtype=real_dtype)

            func = elementwise.get_imag_kernel(dtype, real_dtype)
            func.prepared_async_call(self._grid, self._block, None,
                    self.gpudata, result.gpudata,
                    self.mem_size)

            return result
        else:
            return zeros_like(self)
示例#18
0
    def test_complex_bits(self):
        from pycuda.curandom import rand as curand

        if has_double_support():
            dtypes = [np.complex64, np.complex128]
        else:
            dtypes = [np.complex64]

        n = 20
        for tp in dtypes:
            dtype = np.dtype(tp)
            from pytools import match_precision
            real_dtype = match_precision(np.dtype(np.float64), dtype)

            z = (curand((n, ), real_dtype).astype(dtype) + 1j * curand(
                (n, ), real_dtype).astype(dtype))

            assert la.norm(z.get().real - z.real.get()) == 0
            assert la.norm(z.get().imag - z.imag.get()) == 0
            assert la.norm(z.get().conj() - z.conj().get()) == 0
示例#19
0
    def test_complex_bits(self):
        from pycuda.curandom import rand as curand

        if has_double_support():
            dtypes = [np.complex64, np.complex128]
        else:
            dtypes = [np.complex64]

        n = 20
        for tp in dtypes:
            dtype = np.dtype(tp)
            from pytools import match_precision
            real_dtype = match_precision(np.dtype(np.float64), dtype)

            z = (curand((n,), real_dtype).astype(dtype)
                    + 1j*curand((n,), real_dtype).astype(dtype))

            assert la.norm(z.get().real - z.real.get()) == 0
            assert la.norm(z.get().imag - z.imag.get()) == 0
            assert la.norm(z.get().conj() - z.conj().get()) == 0
示例#20
0
    def __init__(self, use_high_order=True, dtype=numpy.float64, rcon=None,
            vector_primitive_factory=None, atol=0, rtol=0,
            max_dt_growth=5, min_dt_shrinkage=0.1,
            limiter=None):
        if vector_primitive_factory is None:
            from hedge.vector_primitives import VectorPrimitiveFactory
            self.vector_primitive_factory = VectorPrimitiveFactory()
        else:
            self.vector_primitive_factory = vector_primitive_factory

        from pytools.log import IntervalTimer, EventCounter
        timer_factory = IntervalTimer
        if rcon is not None:
            timer_factory = rcon.make_timer

        if limiter is None:
            self.limiter = lambda x: x
        else:
            self.limiter = limiter

        self.timer = timer_factory(
                "t_rk", "Time spent doing algebra in Runge-Kutta")
        self.flop_counter = EventCounter(
                "n_flops_rk", "Floating point operations performed in Runge-Kutta")

        self.use_high_order = use_high_order

        self.dtype = numpy.dtype(dtype)

        self.adaptive = bool(atol or rtol)
        self.atol = atol
        self.rtol = rtol

        from pytools import match_precision
        self.scalar_dtype = match_precision(
                numpy.dtype(numpy.float64), self.dtype)

        self.max_dt_growth = max_dt_growth
        self.min_dt_shrinkage = min_dt_shrinkage

        self.linear_combiner_cache = {}
示例#21
0
    def test_complex_bits(self):
        from pycuda.curandom import rand as curand

        if has_double_support():
            dtypes = [np.complex64, np.complex128]
        else:
            dtypes = [np.complex64]

        n = 20
        for tp in dtypes:
            dtype = np.dtype(tp)
            from pytools import match_precision
            real_dtype = match_precision(np.dtype(np.float64), dtype)

            z = (curand((n,), real_dtype).astype(dtype)
                    + 1j*curand((n,), real_dtype).astype(dtype))

            assert la.norm(z.get().real - z.real.get()) == 0
            assert la.norm(z.get().imag - z.imag.get()) == 0
            assert la.norm(z.get().conj() - z.conj().get()) == 0

            # verify contiguity is preserved
            for order in ["C", "F"]:
                # test both zero and non-zero value code paths
                z_real = gpuarray.zeros(z.shape, dtype=real_dtype,
                                        order=order)
                z2 = z.reshape(z.shape, order=order)
                for zdata in [z_real, z2]:
                    if order == "C":
                        assert zdata.flags.c_contiguous == True
                        assert zdata.real.flags.c_contiguous == True
                        assert zdata.imag.flags.c_contiguous == True
                        assert zdata.conj().flags.c_contiguous == True
                    elif order == "F":
                        assert zdata.flags.f_contiguous == True
                        assert zdata.real.flags.f_contiguous == True
                        assert zdata.imag.flags.f_contiguous == True
                        assert zdata.conj().flags.f_contiguous == True
示例#22
0
    def __init__(self, dtype=numpy.float64, rcon=None,
            vector_primitive_factory=None):
        if vector_primitive_factory is None:
            from hedge.vector_primitives import VectorPrimitiveFactory
            self.vector_primitive_factory = VectorPrimitiveFactory()
        else:
            self.vector_primitive_factory = vector_primitive_factory

        from pytools.log import IntervalTimer, EventCounter
        timer_factory = IntervalTimer
        if rcon is not None:
            timer_factory = rcon.make_timer

        self.timer = timer_factory(
                "t_rk4", "Time spent doing algebra in RK4")
        self.flop_counter = EventCounter(
                "n_flops_rk4", "Floating point operations performed in RK4")

        from pytools import match_precision
        self.dtype = numpy.dtype(dtype)
        self.scalar_dtype = match_precision(
                numpy.dtype(numpy.float64), self.dtype)
        self.coeffs = numpy.array([self._RK4A, self._RK4B, self._RK4C], 
                dtype=self.scalar_dtype).T
示例#23
0
def squared_norm(self):
    dtype_out = match_precision(np.dtype('float64'), self.dtype)
    out = self.data._new_like_me(dtype=dtype_out)
    krnl = get_norm_kernel(self.dtype, dtype_out)
    krnl(self.data, out)
    return out