Exemplos de get_optimal_global_work_size em Python, exemplos de utils.get_optimal_global_work_size em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: fdtd3d_gpu_cpu_non-pinned.py Projeto: xj361685640/fdtd_accelerate

    def __init__(s, nxs, ny, nz, target_device='all', print_device_info=True):
        s.gpu_devices = utils.get_gpu_devices()
        if print_device_info:
            utils.print_gpu_info(s.gpu_devices)
            utils.print_cpu_info()
        ngpu_dev = len(s.gpu_devices)

        s.context, s.queues = utils.create_context_queues(s.gpu_devices)
        s.ngpu = len(s.gpu_devices)
        s.Ls = 256
        if target_device == int:
            s.Gs = utils.get_optimal_global_work_size(
                s.gpu_devices[target_device])
        else:
            s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[0])

        if type(nxs) == list:
            if len(nxs) == s.ngpu:
                s.nxs = nxs
                s.nx_gpu = np.array(nxs).sum()
            else:
                print(
                    'Error: len(nxs) %d is not matched with the number of target devices %d.'
                    % (len(nxs), s.ngpu))
                sys.exit()
        elif type(nxs) == int:
            if nxs % s.ngpu == 0:
                s.nxs = [nxs / s.ngpu for i in xrange(s.ngpu)]
                s.nx_gpu = nxs
            else:
                print(
                    'Error: nxs %d is not multiple of the number of target devices %d.'
                    % (nxs, s.ngpu))
                sys.exit()
        else:
            print('Error: nxs type %s is invalid.' % type(nxs))
            sys.exit()

        s.ny, s.nz = ny, nz
        s.check_grid_size()
        s.allocations()
        s.get_program(print_source=False)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: fdtd3d_gpu_cpu_non-pinned.py Projeto: wbkifun/fdtd_accelerate

	def __init__(s, nxs, ny, nz, target_device='all', print_device_info=True):
		s.gpu_devices = utils.get_gpu_devices()
		if print_device_info:
			utils.print_gpu_info(s.gpu_devices)
			utils.print_cpu_info()
		ngpu_dev = len(s.gpu_devices)

		s.context, s.queues = utils.create_context_queues(s.gpu_devices)
		s.ngpu = len(s.gpu_devices)
		s.Ls = 256
		if target_device == int:
			s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[target_device])
		else:
			s.Gs = utils.get_optimal_global_work_size(s.gpu_devices[0])

		if type(nxs) == list:
			if len(nxs) == s.ngpu:
				s.nxs = nxs
				s.nx_gpu = np.array(nxs).sum()
			else:
				print('Error: len(nxs) %d is not matched with the number of target devices %d.' %(len(nxs), s.ngpu))
				sys.exit()
		elif type(nxs) == int:
			if nxs % s.ngpu == 0:
				s.nxs = [nxs/s.ngpu for i in xrange(s.ngpu)]
				s.nx_gpu = nxs
			else:
				print('Error: nxs %d is not multiple of the number of target devices %d.' %(nxs, s.ngpu))
				sys.exit()
		else:
			print('Error: nxs type %s is invalid.' %type(nxs))
			sys.exit()

		s.ny, s.nz = ny, nz
		s.check_grid_size()
		s.allocations()
		s.get_program(print_source=False)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: fdtd3d.py Projeto: wbkifun/fdtd_accelerate

	def __init__(s, nxs, ny, nz, target_device='all', print_verbose=True):
		s.print_verbose = print_verbose
		s.gpu_devices = utils.get_gpu_devices(s.print_verbose)
		if s.print_verbose:
			utils.print_gpu_info(s.gpu_devices)
			utils.print_cpu_info()
		ngpu_dev = len(s.gpu_devices)

		s.lsize = 256
		s.gsizes = []
		s.nnx = 1
		s.ngpu = ngpu_dev
		s.context, s.queues = utils.create_context_queues(s.gpu_devices)
		td = target_device
		if ngpu_dev > 0:
			for device in s.gpu_devices:
				s.gsizes.append( utils.get_optimal_global_work_size(device) )

			if td == 'cpu':
				s.ngpu = 0
				target_str = 'CPU'
			elif td in ['gpu%d' % i for i in range(ngpu_dev)]:
				s.ngpu = 1
				gpu_num = int(td.strip('gpu'))
				s.gsizes = [ s.gsizes[gpu_num] ]
				s.gpu_devices = [ s.gpu_devices[gpu_num] ]
				s.context, s.queues = utils.create_context_queues(s.gpu_devices)
				target_str = 'Single GPU #%d' % gpu_num
			elif td in ['gpu']:
				s.nnx = ngpu_dev
				target_str = '%d GPUs' % s.ngpu
			elif td in ['all']:
				s.nnx = ngpu_dev + 1
				target_str = 'CPU + %d GPUs' % s.ngpu
			else:
				print('Error: Invalid target_device option.')
				print('      Possible options: %s' %(['all', 'cpu', 'gpu'] +  ['gpu%d' % i for i in range(ngpu_dev)]))
				sys.exit()
		else:
			if td in ['all', 'cpu']:
				s.nnx = 1
				s.ngpu = 0
				target_str = 'CPU'
			else:
				print('Error: Invalid target_device option.')
				print('      There are no GPU devices.')
				print('      Possible options: %s' %(['all', 'cpu']))
				sys.exit()

		if type(nxs) == list:
			if len(nxs) == s.nnx:
				s.nxs = nxs
				s.nx_total = np.array(nxs).sum()
			else:
				print('Error: len(nxs) %d is not matched with the number of target devices %d.' %(len(nxs), s.nnx))
				sys.exit()
		elif type(nxs) == int:
			s.nx_total = nxs
			if s.nnx == 1:
				s.nxs = [nxs]
			else:
				#s.nxs = utils.get_optimal_nxs()
				s.nxs = [nxs/s.ngpu for i in xrange(s.ngpu)]
		else:
			print('Error: nxs type %s is invalid.' % type(nxs))
			print('      Possible types: %s' %(['list', 'int']))
			sys.exit()

		if s.print_verbose:
			print('Target Device : %s' % target_str)
			print('s.nnx = %d' % s.nnx)
			print('s.ngpu = %d' % s.ngpu)
			print('s.nxs = %s' % s.nxs)
			print('')

		s.ny, s.nz = ny, nz
		s.check_grid_size()
		s.allocations()
		s.get_program(print_ksource=False)
		s.prepare_updates()

Exemplo n.º 4

0

Exibir arquivo


if __name__ == '__main__':
    #nx, ny, nz = 240, 256, 256		# 540 MB
    nx, ny, nz = 512, 480, 480  # 3.96 GB
    #nx, ny, nz = 480, 480, 480		# 3.71 GB
    tmax, tgap = 200, 10
    gpu_id = 0

    import utils
    gpu_devices = utils.get_gpu_devices()
    utils.print_gpu_info(gpu_devices)
    context, queues = utils.create_context_queues(gpu_devices)
    queue = queues[gpu_id]

    gs = utils.get_optimal_global_work_size(gpu_devices[gpu_id])
    emf = EMField3dGpu(context, queue, nx, ny, nz)
    fdtd = Fdtd3dSingleGpu(emf, gs)
    src = Fdtd3dSrcGpu(emf, 'ez', gs)

    # Plot
    import matplotlib.pyplot as plt
    plt.ion()

    f = np.ones((nx, ny, nz), 'f')
    imsh = plt.imshow(f[:, :, nz / 2].T,
                      cmap=plt.cm.hot,
                      origin='lower',
                      vmin=0,
                      vmax=0.005)
    plt.colorbar()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: fdtd3d.py Projeto: wbkifun/fdtd_accelerate

if __name__ == '__main__':
	#nx, ny, nz = 240, 256, 256		# 540 MB
	nx, ny, nz = 512, 480, 480		# 3.96 GB
	#nx, ny, nz = 480, 480, 480		# 3.71 GB
	tmax, tgap = 200, 10
	gpu_id = 0


	import utils
	gpu_devices = utils.get_gpu_devices()
	utils.print_gpu_info(gpu_devices)
	context, queues = utils.create_context_queues(gpu_devices)
	queue = queues[gpu_id]

	gs = utils.get_optimal_global_work_size( gpu_devices[gpu_id] )
	emf = EMField3dGpu(context, queue, nx, ny, nz)
	fdtd = Fdtd3dSingleGpu(emf, gs)
	src = Fdtd3dSrcGpu(emf, 'ez', gs)


	# Plot
	import matplotlib.pyplot as plt
	plt.ion()

	f = np.ones((nx, ny, nz), 'f')
	imsh = plt.imshow(f[:,:,nz/2].T, cmap=plt.cm.hot, origin='lower', vmin=0, vmax=0.005)
	plt.colorbar()


	# Main loop

Exemplo n.º 6

0

Exibir arquivo

Arquivo: fdtd3d.py Projeto: xj361685640/fdtd_accelerate

    def __init__(s, nxs, ny, nz, target_device='all', print_verbose=True):
        s.print_verbose = print_verbose
        s.gpu_devices = utils.get_gpu_devices(s.print_verbose)
        if s.print_verbose:
            utils.print_gpu_info(s.gpu_devices)
            utils.print_cpu_info()
        ngpu_dev = len(s.gpu_devices)

        s.lsize = 256
        s.gsizes = []
        s.nnx = 1
        s.ngpu = ngpu_dev
        s.context, s.queues = utils.create_context_queues(s.gpu_devices)
        td = target_device
        if ngpu_dev > 0:
            for device in s.gpu_devices:
                s.gsizes.append(utils.get_optimal_global_work_size(device))

            if td == 'cpu':
                s.ngpu = 0
                target_str = 'CPU'
            elif td in ['gpu%d' % i for i in range(ngpu_dev)]:
                s.ngpu = 1
                gpu_num = int(td.strip('gpu'))
                s.gsizes = [s.gsizes[gpu_num]]
                s.gpu_devices = [s.gpu_devices[gpu_num]]
                s.context, s.queues = utils.create_context_queues(
                    s.gpu_devices)
                target_str = 'Single GPU #%d' % gpu_num
            elif td in ['gpu']:
                s.nnx = ngpu_dev
                target_str = '%d GPUs' % s.ngpu
            elif td in ['all']:
                s.nnx = ngpu_dev + 1
                target_str = 'CPU + %d GPUs' % s.ngpu
            else:
                print('Error: Invalid target_device option.')
                print('      Possible options: %s' %
                      (['all', 'cpu', 'gpu'] +
                       ['gpu%d' % i for i in range(ngpu_dev)]))
                sys.exit()
        else:
            if td in ['all', 'cpu']:
                s.nnx = 1
                s.ngpu = 0
                target_str = 'CPU'
            else:
                print('Error: Invalid target_device option.')
                print('      There are no GPU devices.')
                print('      Possible options: %s' % (['all', 'cpu']))
                sys.exit()

        if type(nxs) == list:
            if len(nxs) == s.nnx:
                s.nxs = nxs
                s.nx_total = np.array(nxs).sum()
            else:
                print(
                    'Error: len(nxs) %d is not matched with the number of target devices %d.'
                    % (len(nxs), s.nnx))
                sys.exit()
        elif type(nxs) == int:
            s.nx_total = nxs
            if s.nnx == 1:
                s.nxs = [nxs]
            else:
                #s.nxs = utils.get_optimal_nxs()
                s.nxs = [nxs / s.ngpu for i in xrange(s.ngpu)]
        else:
            print('Error: nxs type %s is invalid.' % type(nxs))
            print('      Possible types: %s' % (['list', 'int']))
            sys.exit()

        if s.print_verbose:
            print('Target Device : %s' % target_str)
            print('s.nnx = %d' % s.nnx)
            print('s.ngpu = %d' % s.ngpu)
            print('s.nxs = %s' % s.nxs)
            print('')

        s.ny, s.nz = ny, nz
        s.check_grid_size()
        s.allocations()
        s.get_program(print_ksource=False)
        s.prepare_updates()