def __init__(self, cfg): super(OpenMPBackend, self).__init__(cfg) # Take the alignment requirement to be 32-bytes self.alignb = 32 from pyfr.backends.openmp import (blasext, cblas, packing, provider, types) # Register our data types self.const_matrix_cls = types.OpenMPConstMatrix self.matrix_cls = types.OpenMPMatrix self.matrix_bank_cls = types.OpenMPMatrixBank self.matrix_rslice_cls = types.OpenMPMatrixRSlice self.mpi_matrix_cls = types.OpenMPMPIMatrix self.mpi_view_cls = types.OpenMPMPIView self.queue_cls = types.OpenMPQueue self.view_cls = types.OpenMPView # Template lookup self.lookup = DottedTemplateLookup('pyfr.backends.openmp.kernels') # Kernel provider classes kprovcls = [ provider.OpenMPPointwiseKernelProvider, blasext.OpenMPBlasExtKernels, packing.OpenMPPackingKernels, cblas.OpenMPCBLASKernels ] self._providers = [k(self) for k in kprovcls] # Pointwise kernels self.pointwise = self._providers[0]
def __init__(self, cfg): super(CUDABackend, self).__init__(cfg) # Get the desired CUDA device devid = cfg.get('backend-cuda', 'device-id', 'round-robin') if not re.match(r'(round-robin|local-rank|\d+)$', devid): raise ValueError('Invalid device-id') # Handle the local-rank case if devid == 'local-rank': devid = str(get_local_rank()) # In the non round-robin case set CUDA_DEVICE to be the desired # CUDA device number (used by pycuda.autoinit) os.environ.pop('CUDA_DEVICE', None) if devid != 'round-robin': os.environ['CUDA_DEVICE'] = devid # Create a CUDA context from pycuda.autoinit import context import pycuda.driver as cuda # Take the required alignment to be 128 bytes self.alignb = 128 # Some CUDA devices share L1 cache and shared memory; on these # devices CUDA allows us to specify a preference between L1 # cache and shared memory. For the sake of CUBLAS (which # benefits greatly from more shared memory but fails to # declare its preference) we set the global default to # PREFER_SHARED. context.set_cache_config(cuda.func_cache.PREFER_SHARED) from pyfr.backends.cuda import (blasext, cublas, packing, provider, types) # Register our data types self.base_matrix_cls = types.CUDAMatrixBase self.const_matrix_cls = types.CUDAConstMatrix self.matrix_cls = types.CUDAMatrix self.matrix_bank_cls = types.CUDAMatrixBank self.matrix_rslice_cls = types.CUDAMatrixRSlice self.mpi_matrix_cls = types.CUDAMPIMatrix self.mpi_view_cls = types.CUDAMPIView self.queue_cls = types.CUDAQueue self.view_cls = types.CUDAView # Template lookup self.lookup = DottedTemplateLookup('pyfr.backends.cuda.kernels') # Instantiate the base kernel providers kprovs = [ provider.CUDAPointwiseKernelProvider, blasext.CUDABlasExtKernels, packing.CUDAPackingKernels, cublas.CUDACUBLASKernels ] self._providers = [k(self) for k in kprovs] # Pointwise kernels self.pointwise = self._providers[0]
def lookup(self): pkg = f'pyfr.backends.{self.name}.kernels' dfltargs = dict(fpdtype=self.fpdtype, soasz=self.soasz, csubsz=self.csubsz, math=math) return DottedTemplateLookup(pkg, dfltargs)
def lookup(self): pkg = 'pyfr.backends.{0}.kernels'.format(self.name) dfltargs = dict(alignb=self.alignb, fpdtype=self.fpdtype, soasz=self.soasz, math=math) return DottedTemplateLookup(pkg, dfltargs)
def __init__(self, cfg): super().__init__(cfg) import pymic as mic # Get the device ID to use devid = cfg.get('backend-mic', 'device-id', 'local-rank') # Handle the local-rank case if devid == 'local-rank': devid = str(get_local_rank()) # Get a handle to the desired device self.dev = mic.devices[int(devid)] # Default stream self.sdflt = self.dev.get_default_stream() # Take the alignment requirement to be 64-bytes self.alignb = 64 from pyfr.backends.mic import (blasext, cblas, packing, provider, types) # Register our data types self.base_matrix_cls = types.MICMatrixBase self.const_matrix_cls = types.MICConstMatrix self.matrix_cls = types.MICMatrix self.matrix_bank_cls = types.MICMatrixBank self.matrix_rslice_cls = types.MICMatrixRSlice self.queue_cls = types.MICQueue self.view_cls = types.MICView self.xchg_matrix_cls = types.MICXchgMatrix self.xchg_view_cls = types.MICXchgView # Template lookup self.lookup = DottedTemplateLookup('pyfr.backends.mic.kernels', fpdtype=self.fpdtype, alignb=self.alignb) # Kernel provider classes kprovcls = [ provider.MICPointwiseKernelProvider, blasext.MICBlasExtKernels, packing.MICPackingKernels, cblas.MICCBLASKernels ] self._providers = [k(self) for k in kprovcls] # Pointwise kernels self.pointwise = self._providers[0]
def __init__(self, cfg): super(OpenCLBackend, self).__init__(cfg) import pyopencl as cl # Get the platform/device info from the config file platid = cfg.get('backend-opencl', 'platform-id', '0').lower() devid = cfg.get('backend-opencl', 'device-id', 'local-rank').lower() devtype = cfg.get('backend-opencl', 'device-type', 'all').upper() # Handle the local-rank case if devid == 'local-rank': devid = str(get_local_rank()) # Map the device type to the corresponding PyOpenCL constant devtype = getattr(cl.device_type, devtype) # Determine the OpenCL platform to use for i, platform in enumerate(cl.get_platforms()): if platid == str(i) or platid == platform.name.lower(): break else: raise ValueError('No suitable OpenCL platform found') # Determine the OpenCL device to use for i, device in enumerate(platform.get_devices(devtype)): if devid == str(i) or devid == device.name.lower(): break else: raise ValueError('No suitable OpenCL device found') # Create a OpenCL context on this device self.ctx = cl.Context([device]) # Create a queue for initialisation-type operations self.qdflt = cl.CommandQueue(self.ctx) # Compute the alignment requirement for the context self.alignb = device.mem_base_addr_align // 8 from pyfr.backends.opencl import (blasext, clblas, packing, provider, types) # Register our data types self.base_matrix_cls = types.OpenCLMatrixBase self.const_matrix_cls = types.OpenCLConstMatrix self.matrix_cls = types.OpenCLMatrix self.matrix_bank_cls = types.OpenCLMatrixBank self.matrix_rslice_cls = types.OpenCLMatrixRSlice self.mpi_matrix_cls = types.OpenCLMPIMatrix self.mpi_view_cls = types.OpenCLMPIView self.queue_cls = types.OpenCLQueue self.view_cls = types.OpenCLView # Template lookup self.lookup = DottedTemplateLookup('pyfr.backends.opencl.kernels') # Instantiate the base kernel providers kprovs = [ provider.OpenCLPointwiseKernelProvider, blasext.OpenCLBlasExtKernels, packing.OpenCLPackingKernels, clblas.OpenCLClBLASKernels ] self._providers = [k(self) for k in kprovs] # Pointwise kernels self.pointwise = self._providers[0]