def init_gpu(self): self.context = gpu.create_cuda_context(self.device_id) self.gpu_geometry = gpu.GPUGeometry(self.geometry) self.gpu_funcs = gpu.GPUFuncs(gpu.get_cu_module('mesh.h')) self.hybrid_funcs = gpu.GPUFuncs(gpu.get_cu_module('hybrid_render.cu')) self.gpu_geometries = [self.gpu_geometry] self.width, self.height = self.size self.npixels = self.width * self.height self.clock = pygame.time.Clock() self.doom_mode = False try: if self.width == 640: # SECRET DOOM MODE! print 'shotgun activated!' self.doom_hud = pygame.image.load( 'images/doomhud.png').convert_alpha() rect = self.doom_hud.get_rect() self.doom_rect = rect.move(0, self.height - rect.height) self.doom_mode = True except: pass lower_bound, upper_bound = self.geometry.mesh.get_bounds() self.mesh_diagonal_norm = np.linalg.norm(upper_bound - lower_bound) self.scale = self.mesh_diagonal_norm self.motion = 'coarse' self.nblocks = 64 self.point = np.array([ 0, -self.mesh_diagonal_norm, (lower_bound[2] + upper_bound[2]) / 2 ]) self.axis1 = np.array([0, 0, 1], float) self.axis2 = np.array([1, 0, 0], float) self.film_width = 35.0 # mm pos, dir = from_film(self.point, axis1=self.axis1, axis2=self.axis2, size=self.size, width=self.film_width) self.rays = gpu.GPURays(pos, dir, max_alpha_depth=self.max_alpha_depth) self.pixels_gpu = ga.empty(self.npixels, dtype=np.uint32) self.movie = False self.movie_index = 0 self.movie_dir = None self.hybrid_render = False
def intersect(gpu_geometry, number=100, nphotons=500000, nthreads_per_block=64, max_blocks=1024): "Returns the average number of ray intersections per second." distances_gpu = ga.empty(nphotons, dtype=np.float32) module = gpu.get_cu_module('mesh.h', options=('--use_fast_math', )) gpu_funcs = gpu.GPUFuncs(module) run_times = [] for i in tools.progress(list(range(number))): pos = ga.zeros(nphotons, dtype=ga.vec.float3) dir = sample.uniform_sphere(nphotons) reorder = tools.argsort_direction(dir) dir = ga.to_gpu(gpu.to_float3(dir[reorder])) t0 = time.time() gpu_funcs.distance_to_mesh(np.int32(pos.size), pos, dir, gpu_geometry.gpudata, distances_gpu, block=(nthreads_per_block, 1, 1), grid=(pos.size // nthreads_per_block + 1, 1)) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nphotons / ufloat((np.mean(run_times), np.std(run_times)))