示例#1
0
def get_MH_Eval(k):
    if k not in MH_eval_functions:
        try:
            print('trying fast')
            from function_generator import FunctionGenerator
            from scipy.special import k0
            fast_k0 = FunctionGenerator(k0,
                                        0.0,
                                        1000.0,
                                        tol=1e-14,
                                        verbose=True)
            _fast_k0 = fast_k0.get_base_function(check=False)

            @numba.njit(fastmath=True)
            def func(sx, sy, tx, ty):
                dx = tx - sx
                dy = ty - sy
                d = np.sqrt(dx * dx + dy * dy)
                return _fast_k0(k * d)

            print('fast success')
        except:

            @numba.njit(fastmath=True)
            def func(sx, sy, tx, ty):
                dx = tx - sx
                dy = ty - sy
                d = np.sqrt(dx * dx + dy * dy)
                return _numba_k0(k * d)

        MH_eval_functions[k] = func
    return MH_eval_functions[k]
示例#2
0
                     error_model=error_model)
    build_time = time.time() - st
    fa = approx_func(xtest, check_bounds=False)
    out = np.empty(n, dtype=fa.dtype)
    st = time.time()
    fa = approx_func(xtest, check_bounds=False, out=out)
    approx_func_time1 = time.time() - st

    # test approximation function with checks
    fa = approx_func(xtest, check_bounds=True)
    st = time.time()
    fa1 = approx_func(xtest, check_bounds=True)
    approx_func_time2 = time.time() - st

    # extract serial function, and compile it
    base_func = approx_func.get_base_function(check=False)

    @numba.njit(parallel=True, fastmath=True)
    def func_eval(xs, out):
        for i in numba.prange(xs.size):
            out[i] = base_func(xs[i])

    fa2 = np.empty_like(fa)
    func_eval(xtest, fa2)
    st = time.time()
    func_eval(xtest, fa2)
    approx_func_time3 = time.time() - st

    aerr = np.abs(fa - ft)
    rerr1 = np.abs(fa - ft) / np.abs(ft)
    scale = np.abs(ft)
示例#3
0
If this fails, no comparison for correctness!

On my macbook pro N=50,000 takes the direct method ~7s, the FMM <1s
(with N_equiv=64, N_cutoff=500)
And gives error <5e-14
"""

cpu_num = int(os.cpu_count() / 2)

helmholtz_k = 5.0

# fast version of Greens Function
k0 = FunctionGenerator(scipy.special.k0, a=1e-20, b=1000, tol=1e-8, n=8)
k1 = FunctionGenerator(scipy.special.k1, a=1e-20, b=1000, tol=1e-8, n=8)
# extract compilable
_k0 = k0.get_base_function()
_k1 = k1.get_base_function()


# Kernel
@numba.njit(fastmath=True)
def Eval(sx, sy, tx, ty):
    dx = tx - sx
    dy = ty - sy
    d = np.sqrt(dx**2 + dy**2)
    scale = 1.0 / (2 * np.pi)
    return _k0(helmholtz_k * d) * scale


# Kernel
@numba.njit(fastmath=True)
示例#4
0
To compare to
If this fails, no comparison for correctness!

On my macbook pro N=50,000 takes the direct method ~7s, the FMM <1s
(with N_equiv=64, N_cutoff=500)
And gives error <5e-14
"""

cpu_num = int(os.cpu_count()/2)

helmholtz_k = 5.0

# fast version of Greens Function
k0 = FunctionGenerator(scipy.special.k0, a=1e-20, b=1000, tol=1e-12, n=12)
# extract compilable
_k0 = k0.get_base_function()

# Modified Helmholtz Kernel
@numba.njit(fastmath=True)
def Modified_Helmholtz_Eval(sx, sy, tx, ty):
    dx = tx-sx
    dy = ty-sy
    d = np.sqrt(dx**2 + dy**2)
    scale = 1.0/(2*np.pi)
    return _k0(helmholtz_k*d)*scale

N_source = 1000*20
N_target = 1000*2000
test = 'circle' # clustered or circle or uniform
reference_precision = 4
示例#5
0
class ScalarGridBackend(object):
    def __init__(self,
                 gf,
                 fs,
                 ifs,
                 h,
                 spread_width,
                 kernel_kwargs=None,
                 funcgen_tol=1e-10,
                 inline_core=True):
        """
        Backend class for re-usable 'ewald' sum grid evaluation
            for reusability, the grid must have the same h
            and the ewald sum must use the same spread width
        gf: numba callable greens function, gf(r)
        fs: Fourier symbol for operator, fs(kx, ky)
        ifs: Inverse Fourier symbol for operator, ifs(kx, ky)
        h: grid spacing
        spread_width: width to do spreading on
            for Laplace, 15 gives ~7 digits
                         20 gives ~10 digits
                can't seem to do much better than that, right now
        kernel_kwargs: dict of arguments to be passed to gf, fs, ifs, tsgf functions
        funcgen_tol: tolerance for function generator representation of
            functions used in interior spread funciton.  can't seem to beat
            ~10 digits overall now, so no real reason to do more than that
        inline_core: whether to inline the function generator functions into
            the compiled ewald functions
            (inlining may speed things up but slows compilation time,
            sometimes dramatically)
        """
        self.kernel_kwargs = {} if kernel_kwargs is None else kernel_kwargs
        self.gf = lambda r: gf(r, **self.kernel_kwargs)
        self.fourier_symbol = lambda kx, ky: fs(kx, ky, **self.kernel_kwargs)
        self.inverse_fourier_symbol = lambda kx, ky: ifs(
            kx, ky, **self.kernel_kwargs)
        self.h = h
        self.spread_width = spread_width
        self.funcgen_tol = funcgen_tol
        self.inline_core = inline_core
        # construct mollifier
        self.mollifier = SlepianMollifier(2 * self.spread_width)
        self.ssw = self.spread_width * self.h
        # screened greens function
        _excisor_gf = lambda d: excisor(d, 0.0, self.ssw, self.mollifier
                                        ) * self.gf(d)
        try:
            self.ex_funcgen = FunctionGenerator(_excisor_gf,
                                                0.0,
                                                self.ssw,
                                                tol=self.funcgen_tol,
                                                inline_core=self.inline_core)
            self.excisor_gf = self.ex_funcgen.get_base_function(check=False)
        except:
            raise Exception(
                'Failed constructing FunctionGenerator function for mollifier')
        # construct differential operator applied to residual of screened greens function
        _sn = 4 * self.spread_width
        _sgv = np.linspace(0, 4 * self.ssw, _sn, endpoint=False)
        _sgx, _sgy = np.meshgrid(_sgv, _sgv, indexing='ij')
        _skv = np.fft.fftfreq(_sn, self.h / (2 * np.pi))
        _skx, _sky = np.meshgrid(_skv, _skv, indexing='ij')
        _slap = self.fourier_symbol(_skx, _sky)
        pt = np.array([[2 * self.ssw], [2 * self.ssw]])
        targ = np.row_stack([_sgx.ravel(), _sgy.ravel()])
        u = gf_apply(self.gf, pt[0], pt[1], targ[0], targ[1], np.array([
            1.0,
        ])).reshape(_sn, _sn)
        u[_sn // 2, _sn // 2] = 0.0
        dist = np.hypot(_sgx - 2 * self.ssw, _sgy - 2 * self.ssw)
        dec1 = excisor(dist, 0.0, self.ssw, self.mollifier)
        dec2 = excisor(dist, self.ssw, 2 * self.ssw, self.mollifier)
        uf = u * (1 - dec1) * dec2
        self.do_ufd = ifft2(fft2(uf) * _slap).real
        # get an interpolater for this
        _ax = np.linspace(np.pi, 1.5 * np.pi, 1000)
        _ay = np.repeat(np.pi, _ax.size)
        _ar = np.linspace(0, self.ssw, _ax.size)
        _fh = fft2(self.do_ufd) / (_sn * _sn)
        out = finufft.nufft2d2(_ax, _ay, _fh, isign=1, eps=1e-15, modeord=1)
        self._do_ufd_interpolater = sp.interpolate.InterpolatedUnivariateSpline(
            _ar, out.real, k=5, bbox=[0, self.ssw], ext=1)
        try:
            self.do_ufd_funcgen = FunctionGenerator(
                self._do_ufd_interpolater,
                0.0,
                self.ssw,
                tol=self.funcgen_tol,
                inline_core=self.inline_core)
            self.do_ufd_interpolater = self.do_ufd_funcgen.get_base_function(
                check=False)
        except:
            raise Exception(
                'Failed constructing FunctionGenerator function for laplacian of greens function times mollifier'
            )

    def initialize_periodic(self):
        """
        Define periodic local evaluator function
        """
        _ex_gf = self.excisor_gf
        _do_ufd = self.do_ufd_interpolater
        h = self.h
        sw = self.spread_width

        @numba.njit(parallel=True, fastmath=True)
        def ewald_local_periodic(source, charge, xv, yv):
            xmin = xv[0]
            ymin = yv[0]
            shape = (charge.size, 2 * sw + 2, 2 * sw + 2)
            fwork1 = np.empty(shape, dtype=numba.float64)
            fwork2 = np.empty(shape, dtype=numba.float64)
            iwork1 = np.empty(shape, dtype=numba.int64)
            iwork2 = np.empty(shape, dtype=numba.int64)
            bwork1 = np.zeros(shape, dtype=numba.boolean)
            sh = (xv.size, yv.size)
            op = np.zeros(sh, dtype=numba.float64)
            u = np.zeros_like(op)
            N = source.shape[1]
            nx = xv.size
            ny = yv.size
            md = sw * h
            for i in numba.prange(N):
                sx = source[0, i]
                sy = source[1, i]
                ch = charge[i]
                indx = int((sx - xmin) // h)
                indy = int((sy - ymin) // h)
                lxi = indx - sw - 1
                lyi = indy - sw - 1
                hxi = indx + sw + 1
                hyi = indy + sw + 1
                for ixind, ix in enumerate(range(lxi, hxi)):
                    ixm = ix % nx
                    xvh = xmin + ix * h
                    for iyind, iy in enumerate(range(lyi, hyi)):
                        iym = iy % ny
                        yvh = ymin + iy * h
                        d = np.hypot(xvh - sx, yvh - sy)
                        if d <= md:
                            fwork1[i, ixind, iyind] = _ex_gf(d) * ch
                            fwork2[i, ixind, iyind] = _do_ufd(d) * ch
                            iwork1[i, ixind, iyind] = ixm
                            iwork2[i, ixind, iyind] = iym
                            bwork1[i, ixind, iyind] = True
            for i in range(N):
                for ixind in range(2 * sw + 2):
                    for iyind in range(2 * sw + 2):
                        if bwork1[i, ixind, iyind]:
                            ixm = iwork1[i, ixind, iyind]
                            iym = iwork2[i, ixind, iyind]
                            u[ixm, iym] += fwork1[i, ixind, iyind]
                            op[ixm, iym] += fwork2[i, ixind, iyind]
            return op, u

        self.ewald_local_periodic = ewald_local_periodic

    def initialize_freespace(self):
        """
        Define periodic local evaluator function
        """
        _ex_gf = self.excisor_gf
        _do_ufd = self.do_ufd_interpolater
        h = self.h
        sw = self.spread_width

        @numba.njit(parallel=True)
        def ewald_local_freespace(source, charge, xv, yv, op, u, op_na):
            xmin = xv[0]
            ymin = yv[0]
            shape = (charge.size, 2 * sw + 2, 2 * sw + 2)
            fwork1 = np.empty(shape, dtype=numba.float64)
            fwork2 = np.empty(shape, dtype=numba.float64)
            iwork1 = np.empty(shape, dtype=numba.int64)
            iwork2 = np.empty(shape, dtype=numba.int64)
            bwork1 = np.zeros(shape, dtype=numba.boolean)
            N = source.shape[1]
            nx = xv.size
            ny = yv.size
            md = sw * h
            for i in numba.prange(N):
                sx = source[0, i]
                sy = source[1, i]
                ch = charge[i]
                indx = int((sx - xmin) // h)
                indy = int((sy - ymin) // h)
                lxi = indx - sw - 1
                lyi = indy - sw - 1
                hxi = indx + sw + 1
                hyi = indy + sw + 1
                for ixind, ix in enumerate(range(lxi, hxi)):
                    xvh = xmin + ix * h
                    for iyind, iy in enumerate(range(lyi, hyi)):
                        yvh = ymin + iy * h
                        d = np.hypot(xvh - sx, yvh - sy)
                        if d <= md:
                            fwork1[i, ixind, iyind] = _ex_gf(d) * ch
                            fwork2[i, ixind, iyind] = _do_ufd(d) * ch
                            iwork1[i, ixind, iyind] = ix + op_na
                            iwork2[i, ixind, iyind] = iy + op_na
                            bwork1[i, ixind, iyind] = True
            for i in range(N):
                for ixind in range(2 * sw + 2):
                    for iyind in range(2 * sw + 2):
                        if bwork1[i, ixind, iyind]:
                            ix = iwork1[i, ixind, iyind]
                            iy = iwork2[i, ixind, iyind]
                            u[ix, iy] += fwork1[i, ixind, iyind]
                            op[ix, iy] += fwork2[i, ixind, iyind]

        self.ewald_local_freespace = ewald_local_freespace

    def check_periodic(self, xv, yv):
        self.check_either(xv, yv)

    def check_freespace(self, xv, yv):
        self.check_either(xv, yv)
        if xv.size != yv.size:
            raise Exception('Square grid required for freespace evaluator')

    def check_either(self, xv, yv):
        xh = xv[1] - xv[0]
        if np.abs(xh - self.h) > 1e-15:
            raise Exception('h of input xv vector not same as backend')
        yh = yv[1] - yv[0]
        if np.abs(yh - self.h) > 1e-15:
            raise Exception('h of input yv vector not same as backend')
fk0 = FunctionGenerator(k0,
                        0,
                        200,
                        tol=1e-14,
                        n=8,
                        mw=1e-15,
                        error_model=relative_error_model)
fk1 = FunctionGenerator(k1,
                        0,
                        200,
                        tol=1e-14,
                        n=8,
                        mw=1e-15,
                        error_model=relative_error_model)

_fk0 = fk0.get_base_function()
_fk1 = fk1.get_base_function()


@numba.njit()
def _fg_k0(x):
    if x > 200:
        return 0.0
    else:
        return _fk0(x)


@numba.njit()
def _fg_k1(x):
    if x > 200:
        return 0.0
示例#7
0
import mkl
mkl.set_num_threads(cpu_num)


# Greens Function
def GF(x):
    Y = yn(1, x)
    S3 = struve(-3, x)
    S2 = struve(-2, x)
    return (x * (Y - S3) - 4 * S2) / x**2


# fast version of Greens Function
gf = FunctionGenerator(GF, a=1e-30, b=1000, tol=1e-12)
# extract compilable
_gf = gf.get_base_function()


# Kernel
@numba.njit(fastmath=True)
def Kernel(sx, sy, tx, ty):
    dx = tx - sx
    dy = ty - sy
    d = np.sqrt(dx**2 + dy**2)
    return _gf(d)


N_source = 1000 * 10
N_target = 1000 * 10

# construct some data to run FMM on