def __init__(s, nxs, ny, nz, target_device='all', print_device_info=True):
        s.gpu_devices = utils.get_gpu_devices()
        if print_device_info:
            utils.print_gpu_info(s.gpu_devices)
            utils.print_cpu_info()
        ngpu_dev = len(s.gpu_devices)

        s.context, s.queues = utils.create_context_queues(s.gpu_devices)
        s.ngpu = len(s.gpu_devices)
        s.Ls = 256
        if target_device == int:
            s.Gs = utils.get_optimal_global_work_size(
                s.gpu_devices[target_device])
        else:
            s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[0])

        if type(nxs) == list:
            if len(nxs) == s.ngpu:
                s.nxs = nxs
                s.nx_gpu = np.array(nxs).sum()
            else:
                print(
                    'Error: len(nxs) %d is not matched with the number of target devices %d.'
                    % (len(nxs), s.ngpu))
                sys.exit()
        elif type(nxs) == int:
            if nxs % s.ngpu == 0:
                s.nxs = [nxs / s.ngpu for i in xrange(s.ngpu)]
                s.nx_gpu = nxs
            else:
                print(
                    'Error: nxs %d is not multiple of the number of target devices %d.'
                    % (nxs, s.ngpu))
                sys.exit()
        else:
            print('Error: nxs type %s is invalid.' % type(nxs))
            sys.exit()

        s.ny, s.nz = ny, nz
        s.check_grid_size()
        s.allocations()
        s.get_program(print_source=False)
	def __init__(s, nxs, ny, nz, target_device='all', print_device_info=True):
		s.gpu_devices = utils.get_gpu_devices()
		if print_device_info:
			utils.print_gpu_info(s.gpu_devices)
			utils.print_cpu_info()
		ngpu_dev = len(s.gpu_devices)

		s.context, s.queues = utils.create_context_queues(s.gpu_devices)
		s.ngpu = len(s.gpu_devices)
		s.Ls = 256
		if target_device == int:
			s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[target_device])
		else:
			s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[0])

		if type(nxs) == list:
			if len(nxs) == s.ngpu:
				s.nxs = nxs
				s.nx_gpu = np.array(nxs).sum()
			else:
				print('Error: len(nxs) %d is not matched with the number of target devices %d.' %(len(nxs), s.ngpu))
				sys.exit()
		elif type(nxs) == int:
			if nxs % s.ngpu == 0:
				s.nxs = [nxs/s.ngpu for i in xrange(s.ngpu)]
				s.nx_gpu = nxs
			else:
				print('Error: nxs %d is not multiple of the number of target devices %d.' %(nxs, s.ngpu))
				sys.exit()
		else:
			print('Error: nxs type %s is invalid.' %type(nxs))
			sys.exit()

		s.ny, s.nz = ny, nz
		s.check_grid_size()
		s.allocations()
		s.get_program(print_source=False)
示例#3
0
	def __init__(s, nxs, ny, nz, target_device='all', print_verbose=True):
		s.print_verbose = print_verbose
		s.gpu_devices = utils.get_gpu_devices(s.print_verbose)
		if s.print_verbose:
			utils.print_gpu_info(s.gpu_devices)
			utils.print_cpu_info()
		ngpu_dev = len(s.gpu_devices)

		s.lsize = 256
		s.gsizes = []
		s.nnx = 1
		s.ngpu = ngpu_dev
		s.context, s.queues = utils.create_context_queues(s.gpu_devices)
		td = target_device
		if ngpu_dev > 0:
			for device in s.gpu_devices:
				s.gsizes.append( utils.get_optimal_global_work_size(device) )

			if td == 'cpu':
				s.ngpu = 0
				target_str = 'CPU'
			elif td in ['gpu%d' % i for i in range(ngpu_dev)]:
				s.ngpu = 1
				gpu_num = int(td.strip('gpu'))
				s.gsizes = [ s.gsizes[gpu_num] ]
				s.gpu_devices = [ s.gpu_devices[gpu_num] ]
				s.context, s.queues = utils.create_context_queues(s.gpu_devices)
				target_str = 'Single GPU #%d' % gpu_num
			elif td in ['gpu']:
				s.nnx = ngpu_dev
				target_str = '%d GPUs' % s.ngpu
			elif td in ['all']:
				s.nnx = ngpu_dev + 1
				target_str = 'CPU + %d GPUs' % s.ngpu
			else:
				print('Error: Invalid target_device option.')
				print('      Possible options: %s' %(['all', 'cpu', 'gpu'] +  ['gpu%d' % i for i in range(ngpu_dev)]))
				sys.exit()
		else:
			if td in ['all', 'cpu']:
				s.nnx = 1
				s.ngpu = 0
				target_str = 'CPU'
			else:
				print('Error: Invalid target_device option.')
				print('      There are no GPU devices.')
				print('      Possible options: %s' %(['all', 'cpu']))
				sys.exit()

		if type(nxs) == list:
			if len(nxs) == s.nnx:
				s.nxs = nxs
				s.nx_total = np.array(nxs).sum()
			else:
				print('Error: len(nxs) %d is not matched with the number of target devices %d.' %(len(nxs), s.nnx))
				sys.exit()
		elif type(nxs) == int:
			s.nx_total = nxs
			if s.nnx == 1:
				s.nxs = [nxs]
			else:
				#s.nxs = utils.get_optimal_nxs()
				s.nxs = [nxs/s.ngpu for i in xrange(s.ngpu)]
		else:
			print('Error: nxs type %s is invalid.' % type(nxs))
			print('      Possible types: %s' %(['list', 'int']))
			sys.exit()

		if s.print_verbose:
			print('Target Device : %s' % target_str)
			print('s.nnx = %d' % s.nnx)
			print('s.ngpu = %d' % s.ngpu)
			print('s.nxs = %s' % s.nxs)
			print('')

		s.ny, s.nz = ny, nz
		s.check_grid_size()
		s.allocations()
		s.get_program(print_ksource=False)
		s.prepare_updates()
def get_dt(func, args):
	tmax = 1000

	t0 = datetime.now()
	for tstep in xrange(1, tmax+1): func(*args)
	dt0 = datetime.now() - t0
	dt = (dt0.seconds + dt0.microseconds * 1e-6) / tmax

	return dt


nxs = range(96, 480+1, 32)	# nx**2, 72.0 KiB ~ 1.76 MiB
gpu_devices = utils.get_gpu_devices()
utils.print_gpu_info(gpu_devices)
context, queues = utils.create_context_queues(gpu_devices)
queue = queues[0]
mf = cl.mem_flags

print('nx\tdtoh\t\thtod\t\tdtoh(pinned)\thtod(pinned)')
for nx in nxs:
#for nx in [480]:
	print(nx),
	f = np.random.rand(2*nx*nx).astype(np.float32)
	dbuf = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=f)
	hbuf = np.zeros(f.shape, dtype=f.dtype)

	# dtoh (pageable)
	#cl.enqueue_read_buffer(queue, dbuf, hbuf)
	#print(np.linalg.norm(f - hbuf))
	#assert np.linalg.norm(f - hbuf) == 0
def get_dt(func, args):
    tmax = 1000

    t0 = datetime.now()
    for tstep in xrange(1, tmax + 1):
        func(*args)
    dt0 = datetime.now() - t0
    dt = (dt0.seconds + dt0.microseconds * 1e-6) / tmax

    return dt


nxs = range(96, 480 + 1, 32)  # nx**2, 72.0 KiB ~ 1.76 MiB
gpu_devices = utils.get_gpu_devices()
utils.print_gpu_info(gpu_devices)
context, queues = utils.create_context_queues(gpu_devices)
queue = queues[0]
mf = cl.mem_flags

print('nx\tdtoh\t\thtod\t\tdtoh(pinned)\thtod(pinned)')
for nx in nxs:
    #for nx in [480]:
    print(nx),
    f = np.random.rand(2 * nx * nx).astype(np.float32)
    dbuf = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=f)
    hbuf = np.zeros(f.shape, dtype=f.dtype)

    # dtoh (pageable)
    #cl.enqueue_read_buffer(queue, dbuf, hbuf)
    #print(np.linalg.norm(f - hbuf))
    #assert np.linalg.norm(f - hbuf) == 0
示例#6
0
    def __init__(s, nxs, ny, nz, target_device='all', print_verbose=True):
        s.print_verbose = print_verbose
        s.gpu_devices = utils.get_gpu_devices(s.print_verbose)
        if s.print_verbose:
            utils.print_gpu_info(s.gpu_devices)
            utils.print_cpu_info()
        ngpu_dev = len(s.gpu_devices)

        s.lsize = 256
        s.gsizes = []
        s.nnx = 1
        s.ngpu = ngpu_dev
        s.context, s.queues = utils.create_context_queues(s.gpu_devices)
        td = target_device
        if ngpu_dev > 0:
            for device in s.gpu_devices:
                s.gsizes.append(utils.get_optimal_global_work_size(device))

            if td == 'cpu':
                s.ngpu = 0
                target_str = 'CPU'
            elif td in ['gpu%d' % i for i in range(ngpu_dev)]:
                s.ngpu = 1
                gpu_num = int(td.strip('gpu'))
                s.gsizes = [s.gsizes[gpu_num]]
                s.gpu_devices = [s.gpu_devices[gpu_num]]
                s.context, s.queues = utils.create_context_queues(
                    s.gpu_devices)
                target_str = 'Single GPU #%d' % gpu_num
            elif td in ['gpu']:
                s.nnx = ngpu_dev
                target_str = '%d GPUs' % s.ngpu
            elif td in ['all']:
                s.nnx = ngpu_dev + 1
                target_str = 'CPU + %d GPUs' % s.ngpu
            else:
                print('Error: Invalid target_device option.')
                print('      Possible options: %s' %
                      (['all', 'cpu', 'gpu'] +
                       ['gpu%d' % i for i in range(ngpu_dev)]))
                sys.exit()
        else:
            if td in ['all', 'cpu']:
                s.nnx = 1
                s.ngpu = 0
                target_str = 'CPU'
            else:
                print('Error: Invalid target_device option.')
                print('      There are no GPU devices.')
                print('      Possible options: %s' % (['all', 'cpu']))
                sys.exit()

        if type(nxs) == list:
            if len(nxs) == s.nnx:
                s.nxs = nxs
                s.nx_total = np.array(nxs).sum()
            else:
                print(
                    'Error: len(nxs) %d is not matched with the number of target devices %d.'
                    % (len(nxs), s.nnx))
                sys.exit()
        elif type(nxs) == int:
            s.nx_total = nxs
            if s.nnx == 1:
                s.nxs = [nxs]
            else:
                #s.nxs = utils.get_optimal_nxs()
                s.nxs = [nxs / s.ngpu for i in xrange(s.ngpu)]
        else:
            print('Error: nxs type %s is invalid.' % type(nxs))
            print('      Possible types: %s' % (['list', 'int']))
            sys.exit()

        if s.print_verbose:
            print('Target Device : %s' % target_str)
            print('s.nnx = %d' % s.nnx)
            print('s.ngpu = %d' % s.ngpu)
            print('s.nxs = %s' % s.nxs)
            print('')

        s.ny, s.nz = ny, nz
        s.check_grid_size()
        s.allocations()
        s.get_program(print_ksource=False)
        s.prepare_updates()