示例#1
0
    def build(self, source):
        if self.options.cuda_nvcc_opts:
            import shlex
            options = shlex.split(self.options.cuda_nvcc_opts)
        else:
            options = []

        if not self.options.cuda_fermi_highprec and self._device.compute_capability()[0] >= 2:
            options.append('--prec-div=false')
            options.append('--prec-sqrt=false')

        if self.options.cuda_disable_l1:
            options.extend(['-Xptxas', '-dlcm=cg'])

        if cuda.get_driver_version() >= 5000:
            # Generate annotated PTX code.
            options.append('-src-in-ptx')

        if self.options.cuda_cache:
            cache = None
        else:
            cache = False

        return pycuda.compiler.SourceModule(source, options=options,
                nvcc=self.options.cuda_nvcc, keep=self.options.cuda_keep_temp,
                cache_dir=cache) #options=['-Xopencc', '-O0']) #, options=['--use_fast_math'])
示例#2
0
def get_pycuda_info():
    init_all_devices()
    return {"version"               : pycuda.VERSION,
            "version.text"          : pycuda.VERSION_TEXT,
            "version.status"        : pycuda.VERSION_STATUS,
            "driver.version"        : driver.get_version(),
            "driver.driver_version" : driver.get_driver_version()}
示例#3
0
    def build(self, source):
        if self.options.cuda_nvcc_opts:
            import shlex
            options = shlex.split(self.options.cuda_nvcc_opts)
        else:
            options = []

        if not self.options.cuda_fermi_highprec and self._device.compute_capability(
        )[0] >= 2:
            options.append('--prec-div=false')
            options.append('--prec-sqrt=false')

        if self.options.cuda_disable_l1:
            options.extend(['-Xptxas', '-dlcm=cg'])

        if cuda.get_driver_version() >= 5000:
            # Generate annotated PTX code.
            options.append('-src-in-ptx')

        if self.options.cuda_cache:
            cache = None
        else:
            cache = False

        return pycuda.compiler.SourceModule(
            source,
            options=options,
            nvcc=self.options.cuda_nvcc,
            keep=self.options.cuda_keep_temp,
            cache_dir=cache
        )  #options=['-Xopencc', '-O0']) #, options=['--use_fast_math'])
示例#4
0
def init_all_devices():
    global DEVICES
    if DEVICES is not None:
        return DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    driver.init()
    DEVICES = []
    log("CUDA driver version=%s", driver.get_driver_version())
    ngpus = driver.Device.count()
    log.info("CUDA %s / PyCUDA %s, found %s device(s):",
             ".".join([str(x) for x in driver.get_version()]),
             pycuda.VERSION_TEXT, ngpus)
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        try:
            device = driver.Device(i)
            log(" + testing device %s: %s", i, device_info(device))
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)",
                         device_info(device))
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            log("   created context=%s", context)
            log("   api version=%s", context.get_api_version())
            free, total = driver.mem_get_info()
            log("   memory: free=%sMB, total=%sMB", int(free / 1024 / 1024),
                int(total / 1024 / 1024))
            log("   multi-processors: %s, clock rate: %s",
                device.get_attribute(da.MULTIPROCESSOR_COUNT),
                device.get_attribute(da.CLOCK_RATE))
            log("   max block sizes: (%s, %s, %s)",
                device.get_attribute(da.MAX_BLOCK_DIM_X),
                device.get_attribute(da.MAX_BLOCK_DIM_Y),
                device.get_attribute(da.MAX_BLOCK_DIM_Z))
            log("   max grid sizes: (%s, %s, %s)",
                device.get_attribute(da.MAX_GRID_DIM_X),
                device.get_attribute(da.MAX_GRID_DIM_Y),
                device.get_attribute(da.MAX_GRID_DIM_Z))
            max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
            max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
            log("   maximum texture size: %sx%s", max_width, max_height)
            log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
            SMmajor, SMminor = device.compute_capability()
            compute = (SMmajor << 4) + SMminor
            log("   compute capability: %#x (%s.%s)", compute, SMmajor,
                SMminor)
            try:
                DEVICES.append(i)
                log.info("  + %s (memory: %s%% free, compute: %s.%s)",
                         device_info(device), 100 * free / total, SMmajor,
                         SMminor)
            finally:
                context.pop()
        except Exception, e:
            log.error("error on device %s: %s", (device or i), e)
示例#5
0
def get_info():
    return {
        "version": pycuda.VERSION,
        "version.text": pycuda.VERSION_TEXT,
        "version.status": pycuda.VERSION_STATUS,
        "driver.version": driver.get_version(),
        "driver.driver_version": driver.get_driver_version()
    }
示例#6
0
def get_cuda_info():
    init_all_devices()
    return {
            "driver"    : {
                           "version"        : driver.get_version(),
                           "driver_version" : driver.get_driver_version(),
                           }
            }
示例#7
0
def get_cuda_info():
    init_all_devices()
    return {
        "driver": {
            "version": driver.get_version(),
            "driver_version": driver.get_driver_version(),
        }
    }
示例#8
0
def get_info():
    return {
        "version": pycuda.VERSION,
        "version.text": pycuda.VERSION_TEXT,
        "version.status": pycuda.VERSION_STATUS,
        "driver.version": driver.get_version(),
        "driver.driver_version": driver.get_driver_version(),
    }
示例#9
0
def get_pycuda_info():
    init_all_devices()
    return {
        "version": pycuda.VERSION,
        "version.text": pycuda.VERSION_TEXT,
        "version.status": pycuda.VERSION_STATUS,
        "driver.version": driver.get_version(),
        "driver.driver_version": driver.get_driver_version()
    }
示例#10
0
def init_all_devices():
    global DEVICES, DEVICE_INFO
    if DEVICES is not None:
        return  DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    driver.init()
    DEVICES = []
    DEVICE_INFO = {}
    log("CUDA driver version=%s", driver.get_driver_version())
    ngpus = driver.Device.count()
    if ngpus==0:
        log.info("CUDA %s / PyCUDA %s, no devices found", ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT)
        return DEVICES
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        devinfo = "gpu %i" % i
        try:
            device = driver.Device(i)
            devinfo = device_info(device)
            log(" + testing device %s: %s", i, devinfo)
            DEVICE_INFO[i] = devinfo
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)", devinfo)
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            try:
                log("   created context=%s", context)
                log("   api version=%s", context.get_api_version())
                free, total = driver.mem_get_info()
                log("   memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
                log("   multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
                log("   max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
                log("   max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
                max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
                max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
                log("   maximum texture size: %sx%s", max_width, max_height)
                log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
                SMmajor, SMminor = device.compute_capability()
                compute = (SMmajor<<4) + SMminor
                log("   compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
                if i==0:
                    #we print the list info "header" from inside the loop
                    #so that the log output is bunched up together
                    log.info("CUDA %s / PyCUDA %s, found %s device%s:",
                             ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus))
                DEVICES.append(i)
                log.info("  + %s (memory: %s%% free, compute: %s.%s)", device_info(device), 100*free/total, SMmajor, SMminor)
            finally:
                context.pop()
        except Exception as e:
            log.error("error on device %s: %s", devinfo, e)
    return DEVICES
示例#11
0
def init_all_devices():
    global DEVICES
    if DEVICES is not None:
        return  DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    driver.init()
    DEVICES = []
    log("CUDA driver version=%s", driver.get_driver_version())
    log.info("PyCUDA version=%s", pycuda.VERSION_TEXT)
    ngpus = driver.Device.count()
    log.info("CUDA version=%s found %s device(s):", ".".join([str(x) for x in driver.get_version()]), ngpus)
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        try:
            device = driver.Device(i)
            log(" + testing device %s: %s", i, device_info(device))
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)", device_info(device))
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            log("   created context=%s", context)
            log("   api version=%s", context.get_api_version())
            free, total = driver.mem_get_info()
            log("   memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
            log("   multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
            log("   max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
            log("   max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
            max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
            max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
            log("   maximum texture size: %sx%s", max_width, max_height)
            log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
            SMmajor, SMminor = device.compute_capability()
            compute = (SMmajor<<4) + SMminor
            log("   compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
            try:
                DEVICES.append(i)
                log.info(" + %s (memory %s%% free, compute %#x)", device_info(device), 100*free/total, compute)
            finally:
                context.pop()
        except Exception, e:
            log.error("error on device %s: %s", (device or i), e)
示例#12
0
def gpu_info():
    """Show GPU information
    """
    print("CUDA Version: " + format_tuple(cuda.get_version()))
    print("CUDA Driver Version: " + str(cuda.get_driver_version()))
    print("Number of CUDA devices: " + str(cuda.Device.count()))
    for i in range(0, cuda.Device(0).count()):
        print("Device number " + str(i))
        print("  Name of CUDA device: " + str(cuda.Device(i).name()))
        print("  Compute capability: " +
              format_tuple(cuda.Device(i).compute_capability()))
        print("  Total Memory: " +
              str(cuda.Device(i).total_memory() / (1024.0**2)) + " MB")
        print("  Maximum number of threads per block: " +
              str(cuda.Device(i).max_threads_per_block))
        print("  PCI Bus ID: " + str(cuda.Device(i).pci_bus_id()))
        for (k, v) in cuda.Device(i).get_attributes().items():
            print("  " + str(k) + ": " + str(v))
示例#13
0
def driver_init():
    global driver_init_done
    if driver_init_done is None:
        log.info("CUDA initialization (this may take a few seconds)")
        try:
            driver.init()
            driver_init_done = True
            log("CUDA driver version=%s", driver.get_driver_version())
            ngpus = driver.Device.count()
            if ngpus==0:
                log.info("CUDA %s / PyCUDA %s, no devices found",
                         ".".join(str(x) for x in driver.get_version()), pycuda.VERSION_TEXT)
            driver_init_done = True
        except Exception as e:
            log.error("Error: cannot initialize CUDA")
            log.error(" %s", e)
            driver_init_done = False
    return driver_init_done
示例#14
0
 def init_gl(self, width, height):
     super(DenseDemo, self).init_gl(width, height)
     
     import pycuda.gl.autoinit
     print "CUDA version: %s" % str(drv.get_version())
     print "CUDA driver version: %s" % drv.get_driver_version()
     print "CUDA device: %s" % pycuda.gl.autoinit.device.name()
     print "\tCompute capability: %s" % str(pycuda.gl.autoinit.device.compute_capability())
     print "\tTotal memory: %s" % pycuda.gl.autoinit.device.total_memory()
     
     self.ffusion = FreenectFusion(kc.K_ir, kc.K_rgb, kc.T, side=128)
     self.bbox = self.ffusion.get_bounding_box()
     #freenect.sync_set_led(2)
     
     # Create a texture.
     self.gl_rgb_texture = gl.glGenTextures(1)
     gl.glBindTexture(gl.GL_TEXTURE_2D, self.gl_rgb_texture)
     gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
     gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
示例#15
0
    def init_gl(self, width, height):
        super(DenseDemo, self).init_gl(width, height)

        import pycuda.gl.autoinit
        print "CUDA version: %s" % str(drv.get_version())
        print "CUDA driver version: %s" % drv.get_driver_version()
        print "CUDA device: %s" % pycuda.gl.autoinit.device.name()
        print "\tCompute capability: %s" % str(
            pycuda.gl.autoinit.device.compute_capability())
        print "\tTotal memory: %s" % pycuda.gl.autoinit.device.total_memory()

        self.ffusion = FreenectFusion(kc.K_ir, kc.K_rgb, kc.T, side=128)
        self.bbox = self.ffusion.get_bounding_box()
        #freenect.sync_set_led(2)

        # Create a texture.
        self.gl_rgb_texture = gl.glGenTextures(1)
        gl.glBindTexture(gl.GL_TEXTURE_2D, self.gl_rgb_texture)
        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER,
                           gl.GL_LINEAR)
        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER,
                           gl.GL_LINEAR)
示例#16
0
    def __init__(self, blocking=False, use_cache=True):
        self.blocking = blocking
        self.use_cache = use_cache
        self.logger =  logging.getLogger(__name__)
        self.kernels = {}
        
        self.module_path = os.path.dirname(os.path.realpath(__file__))
        
        #Initialize cuda (must be first call to PyCUDA)
        cuda.init(flags=0)
        
        self.logger.info("PyCUDA version %s", str(pycuda.VERSION_TEXT))
        
        #Print some info about CUDA
        self.logger.info("CUDA version %s", str(cuda.get_version()))
        self.logger.info("Driver version %s",  str(cuda.get_driver_version()))

        self.cuda_device = cuda.Device(0)
        self.logger.info("Using '%s' GPU", self.cuda_device.name())
        self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))

        # Create the CUDA context
        if (self.blocking):
            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC)
            self.logger.warning("Using blocking context")
        else:
            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
            
        free, total = cuda.mem_get_info()
        self.logger.debug(" => memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
        
        self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))
        
        #Create cache dir for cubin files
        if (self.use_cache):
            self.cache_path = os.path.join(self.module_path, "cuda_cache") 
            if not os.path.isdir(self.cache_path):
                os.mkdir(self.cache_path)
            self.logger.info("Using CUDA cache dir %s", self.cache_path)
示例#17
0
    def __init__(self, blocking=False, use_cache=True):
        self.blocking = blocking
        self.use_cache = use_cache
        self.logger =  logging.getLogger(__name__)
        self.kernels = {}
        
        self.module_path = os.path.dirname(os.path.realpath(__file__))
        
        #Initialize cuda (must be first call to PyCUDA)
        cuda.init(flags=0)
        
        self.logger.info("PyCUDA version %s", str(pycuda.VERSION_TEXT))
        
        #Print some info about CUDA
        self.logger.info("CUDA version %s", str(cuda.get_version()))
        self.logger.info("Driver version %s",  str(cuda.get_driver_version()))

        self.cuda_device = cuda.Device(0)
        self.logger.info("Using '%s' GPU", self.cuda_device.name())
        self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
        self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))

        # Create the CUDA context
        if (self.blocking):
            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC)
            self.logger.warning("Using blocking context")
        else:
            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
        
        self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))

        #Create cache dir for cubin files
        if (self.use_cache):
            self.cache_path = os.path.join(self.module_path, "cuda_cache") 
            if not os.path.isdir(self.cache_path):
                os.mkdir(self.cache_path)
            self.logger.debug("Using CUDA cache dir %s", self.cache_path)
示例#18
0
    def __init__(self,
                 device=None,
                 context_flags=None,
                 use_cache=True,
                 autotuning=True):
        """
        Create a new CUDA context
        Set device to an id or pci_bus_id to select a specific GPU
        Set context_flags to cuda.ctx_flags.SCHED_BLOCKING_SYNC for a blocking context
        """
        self.use_cache = use_cache
        self.logger = logging.getLogger(__name__)
        self.modules = {}

        self.module_path = os.path.dirname(os.path.realpath(__file__))

        #Initialize cuda (must be first call to PyCUDA)
        cuda.init(flags=0)

        self.logger.info("PyCUDA version %s", str(pycuda.VERSION_TEXT))

        #Print some info about CUDA
        self.logger.info("CUDA version %s", str(cuda.get_version()))
        self.logger.info("Driver version %s", str(cuda.get_driver_version()))

        if device is None:
            device = 0

        self.cuda_device = cuda.Device(device)
        self.logger.info("Using device %d/%d '%s' (%s) GPU", device,
                         cuda.Device.count(), self.cuda_device.name(),
                         self.cuda_device.pci_bus_id())
        self.logger.debug(" => compute capability: %s",
                          str(self.cuda_device.compute_capability()))

        # Create the CUDA context
        if context_flags is None:
            context_flags = cuda.ctx_flags.SCHED_AUTO

        self.cuda_context = self.cuda_device.make_context(flags=context_flags)

        free, total = cuda.mem_get_info()
        self.logger.debug(" => memory: %d / %d MB available",
                          int(free / (1024 * 1024)),
                          int(total / (1024 * 1024)))

        self.logger.info("Created context handle <%s>",
                         str(self.cuda_context.handle))

        #Create cache dir for cubin files
        self.cache_path = os.path.join(self.module_path, "cuda_cache")
        if (self.use_cache):
            if not os.path.isdir(self.cache_path):
                os.mkdir(self.cache_path)
            self.logger.info("Using CUDA cache dir %s", self.cache_path)

        self.autotuner = None
        if (autotuning):
            self.logger.info(
                "Autotuning enabled. It may take several minutes to run the code the first time: have patience"
            )
            self.autotuner = Autotuner.Autotuner()
示例#19
0
def init_all_devices():
    global DEVICES, DEVICE_INFO
    if DEVICES is not None:
        return DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    DEVICES = []
    DEVICE_INFO = {}
    try:
        driver.init()
    except Exception as e:
        log.error("Error: cannot initialize CUDA")
        log.error(" %s", e)
        return DEVICES
    log("CUDA driver version=%s", driver.get_driver_version())
    ngpus = driver.Device.count()
    if ngpus == 0:
        log.info("CUDA %s / PyCUDA %s, no devices found",
                 ".".join([str(x) for x in driver.get_version()]),
                 pycuda.VERSION_TEXT)
        return DEVICES
    cuda_device_blacklist = get_pref("blacklist")
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        devinfo = "gpu %i" % i
        try:
            device = driver.Device(i)
            devinfo = device_info(device)
            if cuda_device_blacklist:
                blacklisted = [
                    x for x in cuda_device_blacklist
                    if x and devinfo.find(x) >= 0
                ]
                log("blacklisted(%s / %s)=%s", devinfo, cuda_device_blacklist,
                    blacklisted)
                if blacklisted:
                    log.warn(
                        "Warning: device '%s' is blacklisted and will not be used",
                        devinfo)
                    continue
            log(" + testing device %s: %s", i, devinfo)
            DEVICE_INFO[i] = devinfo
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)",
                         devinfo)
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            try:
                log("   created context=%s", context)
                log("   api version=%s", context.get_api_version())
                free, total = driver.mem_get_info()
                log("   memory: free=%sMB, total=%sMB",
                    int(free / 1024 / 1024), int(total / 1024 / 1024))
                log("   multi-processors: %s, clock rate: %s",
                    device.get_attribute(da.MULTIPROCESSOR_COUNT),
                    device.get_attribute(da.CLOCK_RATE))
                log("   max block sizes: (%s, %s, %s)",
                    device.get_attribute(da.MAX_BLOCK_DIM_X),
                    device.get_attribute(da.MAX_BLOCK_DIM_Y),
                    device.get_attribute(da.MAX_BLOCK_DIM_Z))
                log("   max grid sizes: (%s, %s, %s)",
                    device.get_attribute(da.MAX_GRID_DIM_X),
                    device.get_attribute(da.MAX_GRID_DIM_Y),
                    device.get_attribute(da.MAX_GRID_DIM_Z))
                max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
                max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
                log("   maximum texture size: %sx%s", max_width, max_height)
                log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
                SMmajor, SMminor = device.compute_capability()
                compute = (SMmajor << 4) + SMminor
                log("   compute capability: %#x (%s.%s)", compute, SMmajor,
                    SMminor)
                if i == 0:
                    #we print the list info "header" from inside the loop
                    #so that the log output is bunched up together
                    log.info("CUDA %s / PyCUDA %s, found %s device%s:",
                             ".".join([str(x) for x in driver.get_version()]),
                             pycuda.VERSION_TEXT, ngpus, engs(ngpus))
                if SMmajor >= 2:
                    DEVICES.append(i)
                else:
                    log.info("  this device is too old!")
                log.info("  + %s (memory: %s%% free, compute: %s.%s)",
                         device_info(device), 100 * free / total, SMmajor,
                         SMminor)
            finally:
                context.pop()
        except Exception as e:
            log.error("error on device %s: %s", devinfo, e)
    return DEVICES
示例#20
0
文件: dnorm.py 项目: CrossD/Stuff
#
# Example based on dnorm from RCUDA
# Timing code from http://wiki.tiker.net/PyCuda/Examples/SimpleSpeedTest
#

import pycuda.autoinit
import pycuda.driver as drv
import numpy as np
import scipy as sp
from scipy.stats import norm
from pycuda.compiler import SourceModule

# Versions:
drv.get_version()
drv.get_driver_version()

m = SourceModule("""
#include <stdio.h>
__global__ void dnorm_kernel(float *vals, float *x, int N, float mu, float sigma, int dbg)
{
    int myblock = blockIdx.x;   // 1D-grid
    int blocksize = blockDim.x; // 1D-block
    int subthread = threadIdx.x;
    int idx = myblock * blocksize + subthread;
    if (idx < N) {
        if (dbg){
            printf("thread idx: %04d\\t x[%d] = %f\\t (n=%d,mu=%f,sigma=%f)\\n",idx,idx,x[idx],N,mu,sigma);
        }
        float std = (x[idx] - mu)/sigma;
        float e = exp( - 0.5 * std * std);
        vals[idx] = e / ( sigma * sqrt(2 * 3.141592653589793));
示例#21
0
def log_sys_info():
    log.info("PyCUDA version=%s", ".".join([str(x) for x in driver.get_version()]))
    log.info("PyCUDA driver version=%s", driver.get_driver_version())
print('testing PyCUDA...')

import pycuda
import pycuda.driver as cuda
import pycuda.autoinit

print('PyCUDA version:      ' + str(pycuda.VERSION_TEXT))
print('CUDA build version:  ' + str(cuda.get_version()))
print('CUDA driver version: ' + str(cuda.get_driver_version()))

dev = cuda.Device(0)

print('CUDA device name:    ' + str(dev.name()))
print('CUDA device memory:  ' + str((int)(dev.total_memory()/1048576)) + ' MB')
print('CUDA device compute: ' + str(dev.compute_capability()))

print('PyCUDA OK\n')
示例#23
0
def log_sys_info():
    log.info("PyCUDA version=%s",
             ".".join([str(x) for x in driver.get_version()]))
    log.info("PyCUDA driver version=%s", driver.get_driver_version())