示例#1
0
文件: base.py 项目: Aerojspark/PyFR
    def __init__(self, cfg):
        super(OpenMPBackend, self).__init__(cfg)

        # Take the alignment requirement to be 32-bytes
        self.alignb = 32

        from pyfr.backends.openmp import (blasext, cblas, packing, provider,
                                          types)

        # Register our data types
        self.const_matrix_cls = types.OpenMPConstMatrix
        self.matrix_cls = types.OpenMPMatrix
        self.matrix_bank_cls = types.OpenMPMatrixBank
        self.matrix_rslice_cls = types.OpenMPMatrixRSlice
        self.mpi_matrix_cls = types.OpenMPMPIMatrix
        self.mpi_view_cls = types.OpenMPMPIView
        self.queue_cls = types.OpenMPQueue
        self.view_cls = types.OpenMPView

        # Template lookup
        self.lookup = DottedTemplateLookup('pyfr.backends.openmp.kernels')

        # Kernel provider classes
        kprovcls = [
            provider.OpenMPPointwiseKernelProvider,
            blasext.OpenMPBlasExtKernels, packing.OpenMPPackingKernels,
            cblas.OpenMPCBLASKernels
        ]
        self._providers = [k(self) for k in kprovcls]

        # Pointwise kernels
        self.pointwise = self._providers[0]
示例#2
0
文件: base.py 项目: fbob/PyFR
    def __init__(self, cfg):
        super(CUDABackend, self).__init__(cfg)

        # Get the desired CUDA device
        devid = cfg.get('backend-cuda', 'device-id', 'round-robin')
        if not re.match(r'(round-robin|local-rank|\d+)$', devid):
            raise ValueError('Invalid device-id')

        # Handle the local-rank case
        if devid == 'local-rank':
            devid = str(get_local_rank())

        # In the non round-robin case set CUDA_DEVICE to be the desired
        # CUDA device number (used by pycuda.autoinit)
        os.environ.pop('CUDA_DEVICE', None)
        if devid != 'round-robin':
            os.environ['CUDA_DEVICE'] = devid

        # Create a CUDA context
        from pycuda.autoinit import context
        import pycuda.driver as cuda

        # Take the required alignment to be 128 bytes
        self.alignb = 128

        # Some CUDA devices share L1 cache and shared memory; on these
        # devices CUDA allows us to specify a preference between L1
        # cache and shared memory.  For the sake of CUBLAS (which
        # benefits greatly from more shared memory but fails to
        # declare its preference) we set the global default to
        # PREFER_SHARED.
        context.set_cache_config(cuda.func_cache.PREFER_SHARED)

        from pyfr.backends.cuda import (blasext, cublas, packing, provider,
                                        types)

        # Register our data types
        self.base_matrix_cls = types.CUDAMatrixBase
        self.const_matrix_cls = types.CUDAConstMatrix
        self.matrix_cls = types.CUDAMatrix
        self.matrix_bank_cls = types.CUDAMatrixBank
        self.matrix_rslice_cls = types.CUDAMatrixRSlice
        self.mpi_matrix_cls = types.CUDAMPIMatrix
        self.mpi_view_cls = types.CUDAMPIView
        self.queue_cls = types.CUDAQueue
        self.view_cls = types.CUDAView

        # Template lookup
        self.lookup = DottedTemplateLookup('pyfr.backends.cuda.kernels')

        # Instantiate the base kernel providers
        kprovs = [
            provider.CUDAPointwiseKernelProvider, blasext.CUDABlasExtKernels,
            packing.CUDAPackingKernels, cublas.CUDACUBLASKernels
        ]
        self._providers = [k(self) for k in kprovs]

        # Pointwise kernels
        self.pointwise = self._providers[0]
示例#3
0
    def lookup(self):
        pkg = f'pyfr.backends.{self.name}.kernels'
        dfltargs = dict(fpdtype=self.fpdtype,
                        soasz=self.soasz,
                        csubsz=self.csubsz,
                        math=math)

        return DottedTemplateLookup(pkg, dfltargs)
示例#4
0
    def lookup(self):
        pkg = 'pyfr.backends.{0}.kernels'.format(self.name)
        dfltargs = dict(alignb=self.alignb,
                        fpdtype=self.fpdtype,
                        soasz=self.soasz,
                        math=math)

        return DottedTemplateLookup(pkg, dfltargs)
示例#5
0
文件: base.py 项目: jgiret/PyFR
    def __init__(self, cfg):
        super().__init__(cfg)

        import pymic as mic

        # Get the device ID to use
        devid = cfg.get('backend-mic', 'device-id', 'local-rank')

        # Handle the local-rank case
        if devid == 'local-rank':
            devid = str(get_local_rank())

        # Get a handle to the desired device
        self.dev = mic.devices[int(devid)]

        # Default stream
        self.sdflt = self.dev.get_default_stream()

        # Take the alignment requirement to be 64-bytes
        self.alignb = 64

        from pyfr.backends.mic import (blasext, cblas, packing, provider,
                                       types)

        # Register our data types
        self.base_matrix_cls = types.MICMatrixBase
        self.const_matrix_cls = types.MICConstMatrix
        self.matrix_cls = types.MICMatrix
        self.matrix_bank_cls = types.MICMatrixBank
        self.matrix_rslice_cls = types.MICMatrixRSlice
        self.queue_cls = types.MICQueue
        self.view_cls = types.MICView
        self.xchg_matrix_cls = types.MICXchgMatrix
        self.xchg_view_cls = types.MICXchgView

        # Template lookup
        self.lookup = DottedTemplateLookup('pyfr.backends.mic.kernels',
                                           fpdtype=self.fpdtype,
                                           alignb=self.alignb)

        # Kernel provider classes
        kprovcls = [
            provider.MICPointwiseKernelProvider, blasext.MICBlasExtKernels,
            packing.MICPackingKernels, cblas.MICCBLASKernels
        ]
        self._providers = [k(self) for k in kprovcls]

        # Pointwise kernels
        self.pointwise = self._providers[0]
示例#6
0
文件: base.py 项目: fbob/PyFR
    def __init__(self, cfg):
        super(OpenCLBackend, self).__init__(cfg)

        import pyopencl as cl

        # Get the platform/device info from the config file
        platid = cfg.get('backend-opencl', 'platform-id', '0').lower()
        devid = cfg.get('backend-opencl', 'device-id', 'local-rank').lower()
        devtype = cfg.get('backend-opencl', 'device-type', 'all').upper()

        # Handle the local-rank case
        if devid == 'local-rank':
            devid = str(get_local_rank())

        # Map the device type to the corresponding PyOpenCL constant
        devtype = getattr(cl.device_type, devtype)

        # Determine the OpenCL platform to use
        for i, platform in enumerate(cl.get_platforms()):
            if platid == str(i) or platid == platform.name.lower():
                break
        else:
            raise ValueError('No suitable OpenCL platform found')

        # Determine the OpenCL device to use
        for i, device in enumerate(platform.get_devices(devtype)):
            if devid == str(i) or devid == device.name.lower():
                break
        else:
            raise ValueError('No suitable OpenCL device found')

        # Create a OpenCL context on this device
        self.ctx = cl.Context([device])

        # Create a queue for initialisation-type operations
        self.qdflt = cl.CommandQueue(self.ctx)

        # Compute the alignment requirement for the context
        self.alignb = device.mem_base_addr_align // 8

        from pyfr.backends.opencl import (blasext, clblas, packing, provider,
                                          types)

        # Register our data types
        self.base_matrix_cls = types.OpenCLMatrixBase
        self.const_matrix_cls = types.OpenCLConstMatrix
        self.matrix_cls = types.OpenCLMatrix
        self.matrix_bank_cls = types.OpenCLMatrixBank
        self.matrix_rslice_cls = types.OpenCLMatrixRSlice
        self.mpi_matrix_cls = types.OpenCLMPIMatrix
        self.mpi_view_cls = types.OpenCLMPIView
        self.queue_cls = types.OpenCLQueue
        self.view_cls = types.OpenCLView

        # Template lookup
        self.lookup = DottedTemplateLookup('pyfr.backends.opencl.kernels')

        # Instantiate the base kernel providers
        kprovs = [
            provider.OpenCLPointwiseKernelProvider,
            blasext.OpenCLBlasExtKernels, packing.OpenCLPackingKernels,
            clblas.OpenCLClBLASKernels
        ]
        self._providers = [k(self) for k in kprovs]

        # Pointwise kernels
        self.pointwise = self._providers[0]