def __init__(s, nxs, ny, nz, target_device='all', print_device_info=True): s.gpu_devices = utils.get_gpu_devices() if print_device_info: utils.print_gpu_info(s.gpu_devices) utils.print_cpu_info() ngpu_dev = len(s.gpu_devices) s.context, s.queues = utils.create_context_queues(s.gpu_devices) s.ngpu = len(s.gpu_devices) s.Ls = 256 if target_device == int: s.Gs = utils.get_optimal_global_work_size( s.gpu_devices[target_device]) else: s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[0]) if type(nxs) == list: if len(nxs) == s.ngpu: s.nxs = nxs s.nx_gpu = np.array(nxs).sum() else: print( 'Error: len(nxs) %d is not matched with the number of target devices %d.' % (len(nxs), s.ngpu)) sys.exit() elif type(nxs) == int: if nxs % s.ngpu == 0: s.nxs = [nxs / s.ngpu for i in xrange(s.ngpu)] s.nx_gpu = nxs else: print( 'Error: nxs %d is not multiple of the number of target devices %d.' % (nxs, s.ngpu)) sys.exit() else: print('Error: nxs type %s is invalid.' % type(nxs)) sys.exit() s.ny, = ny, nz s.check_grid_size() s.allocations() s.get_program(print_source=False)
def __init__(s, nxs, ny, nz, target_device='all', print_device_info=True): s.gpu_devices = utils.get_gpu_devices() if print_device_info: utils.print_gpu_info(s.gpu_devices) utils.print_cpu_info() ngpu_dev = len(s.gpu_devices) s.context, s.queues = utils.create_context_queues(s.gpu_devices) s.ngpu = len(s.gpu_devices) s.Ls = 256 if target_device == int: s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[target_device]) else: s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[0]) if type(nxs) == list: if len(nxs) == s.ngpu: s.nxs = nxs s.nx_gpu = np.array(nxs).sum() else: print('Error: len(nxs) %d is not matched with the number of target devices %d.' %(len(nxs), s.ngpu)) sys.exit() elif type(nxs) == int: if nxs % s.ngpu == 0: s.nxs = [nxs/s.ngpu for i in xrange(s.ngpu)] s.nx_gpu = nxs else: print('Error: nxs %d is not multiple of the number of target devices %d.' %(nxs, s.ngpu)) sys.exit() else: print('Error: nxs type %s is invalid.' %type(nxs)) sys.exit() s.ny, = ny, nz s.check_grid_size() s.allocations() s.get_program(print_source=False)
def __init__(s, nxs, ny, nz, target_device='all', print_verbose=True): s.print_verbose = print_verbose s.gpu_devices = utils.get_gpu_devices(s.print_verbose) if s.print_verbose: utils.print_gpu_info(s.gpu_devices) utils.print_cpu_info() ngpu_dev = len(s.gpu_devices) s.lsize = 256 s.gsizes = [] s.nnx = 1 s.ngpu = ngpu_dev s.context, s.queues = utils.create_context_queues(s.gpu_devices) td = target_device if ngpu_dev > 0: for device in s.gpu_devices: s.gsizes.append( utils.get_optimal_global_work_size(device) ) if td == 'cpu': s.ngpu = 0 target_str = 'CPU' elif td in ['gpu%d' % i for i in range(ngpu_dev)]: s.ngpu = 1 gpu_num = int(td.strip('gpu')) s.gsizes = [ s.gsizes[gpu_num] ] s.gpu_devices = [ s.gpu_devices[gpu_num] ] s.context, s.queues = utils.create_context_queues(s.gpu_devices) target_str = 'Single GPU #%d' % gpu_num elif td in ['gpu']: s.nnx = ngpu_dev target_str = '%d GPUs' % s.ngpu elif td in ['all']: s.nnx = ngpu_dev + 1 target_str = 'CPU + %d GPUs' % s.ngpu else: print('Error: Invalid target_device option.') print(' Possible options: %s' %(['all', 'cpu', 'gpu'] + ['gpu%d' % i for i in range(ngpu_dev)])) sys.exit() else: if td in ['all', 'cpu']: s.nnx = 1 s.ngpu = 0 target_str = 'CPU' else: print('Error: Invalid target_device option.') print(' There are no GPU devices.') print(' Possible options: %s' %(['all', 'cpu'])) sys.exit() if type(nxs) == list: if len(nxs) == s.nnx: s.nxs = nxs s.nx_total = np.array(nxs).sum() else: print('Error: len(nxs) %d is not matched with the number of target devices %d.' %(len(nxs), s.nnx)) sys.exit() elif type(nxs) == int: s.nx_total = nxs if s.nnx == 1: s.nxs = [nxs] else: #s.nxs = utils.get_optimal_nxs() s.nxs = [nxs/s.ngpu for i in xrange(s.ngpu)] else: print('Error: nxs type %s is invalid.' % type(nxs)) print(' Possible types: %s' %(['list', 'int'])) sys.exit() if s.print_verbose: print('Target Device : %s' % target_str) print('s.nnx = %d' % s.nnx) print('s.ngpu = %d' % s.ngpu) print('s.nxs = %s' % s.nxs) print('') s.ny, = ny, nz s.check_grid_size() s.allocations() s.get_program(print_ksource=False) s.prepare_updates()
def __init__(s, nxs, ny, nz, target_device='all', print_verbose=True): s.print_verbose = print_verbose s.gpu_devices = utils.get_gpu_devices(s.print_verbose) if s.print_verbose: utils.print_gpu_info(s.gpu_devices) utils.print_cpu_info() ngpu_dev = len(s.gpu_devices) s.lsize = 256 s.gsizes = [] s.nnx = 1 s.ngpu = ngpu_dev s.context, s.queues = utils.create_context_queues(s.gpu_devices) td = target_device if ngpu_dev > 0: for device in s.gpu_devices: s.gsizes.append(utils.get_optimal_global_work_size(device)) if td == 'cpu': s.ngpu = 0 target_str = 'CPU' elif td in ['gpu%d' % i for i in range(ngpu_dev)]: s.ngpu = 1 gpu_num = int(td.strip('gpu')) s.gsizes = [s.gsizes[gpu_num]] s.gpu_devices = [s.gpu_devices[gpu_num]] s.context, s.queues = utils.create_context_queues( s.gpu_devices) target_str = 'Single GPU #%d' % gpu_num elif td in ['gpu']: s.nnx = ngpu_dev target_str = '%d GPUs' % s.ngpu elif td in ['all']: s.nnx = ngpu_dev + 1 target_str = 'CPU + %d GPUs' % s.ngpu else: print('Error: Invalid target_device option.') print(' Possible options: %s' % (['all', 'cpu', 'gpu'] + ['gpu%d' % i for i in range(ngpu_dev)])) sys.exit() else: if td in ['all', 'cpu']: s.nnx = 1 s.ngpu = 0 target_str = 'CPU' else: print('Error: Invalid target_device option.') print(' There are no GPU devices.') print(' Possible options: %s' % (['all', 'cpu'])) sys.exit() if type(nxs) == list: if len(nxs) == s.nnx: s.nxs = nxs s.nx_total = np.array(nxs).sum() else: print( 'Error: len(nxs) %d is not matched with the number of target devices %d.' % (len(nxs), s.nnx)) sys.exit() elif type(nxs) == int: s.nx_total = nxs if s.nnx == 1: s.nxs = [nxs] else: #s.nxs = utils.get_optimal_nxs() s.nxs = [nxs / s.ngpu for i in xrange(s.ngpu)] else: print('Error: nxs type %s is invalid.' % type(nxs)) print(' Possible types: %s' % (['list', 'int'])) sys.exit() if s.print_verbose: print('Target Device : %s' % target_str) print('s.nnx = %d' % s.nnx) print('s.ngpu = %d' % s.ngpu) print('s.nxs = %s' % s.nxs) print('') s.ny, = ny, nz s.check_grid_size() s.allocations() s.get_program(print_ksource=False) s.prepare_updates()