def build_simple_bvh(degree): #mesh = chroma.models.lionsolid() mesh = chroma.models.companioncube() bvh = make_simple_bvh(mesh, degree) nodes = bvh.nodes layer_bounds = np.append(bvh.layer_offsets, len(nodes)) world_coords = bvh.world_coords for i, (layer_start, layer_end) in enumerate(zip(layer_bounds[:-1], layer_bounds[1:])): print i, node_areas( nodes[layer_start:layer_end]).sum() * world_coords.world_scale**2
def build_simple_bvh(degree): mesh = chroma.models.lionsolid() bvh = make_simple_bvh(mesh, degree) nodes = bvh.nodes layer_bounds = np.append(bvh.layer_offsets, len(nodes)) world_coords = bvh.world_coords for i, (layer_start, layer_end) in enumerate(zip(layer_bounds[:-1], layer_bounds[1:])): print i, node_areas(nodes[layer_start:layer_end]).sum() * world_coords.world_scale**2 assert isinstance(bvh, BVH)
def build_simple_bvh(degree): #mesh = chroma.models.lionsolid() mesh = chroma.models.companioncube() bvh = make_recursive_grid_bvh(mesh, degree, save_morton_codes="mortonout.cu.nudot.txt") #bvh = make_simple_bvh(mesh, degree) nodes = bvh.nodes layer_bounds = np.append(bvh.layer_offsets, len(nodes)) world_coords = bvh.world_coords for i, (layer_start, layer_end) in enumerate(zip(layer_bounds[:-1], layer_bounds[1:])): print i, node_areas( nodes[layer_start:layer_end]).sum() * world_coords.world_scale**2 return bvh
def merge_nodes(nodes, degree, max_ratio=None): nthreads_per_block = 256 context = None queue = None if gpuapi.is_gpu_api_opencl(): context = cltools.get_last_context() queue = cl.CommandQueue(context) # Load GPU functions if gpuapi.is_gpu_api_cuda(): bvh_module = get_module('bvh.cu', options=api_options, include_source_directory=True) elif gpuapi.is_gpu_api_opencl(): # don't like the last context method. trouble. trouble. bvh_module = get_module('bvh.cl', context, options=api_options, include_source_directory=True) else: raise RuntimeError('API is neither CUDA nor OpenCL?!') bvh_funcs = GPUFuncs(bvh_module) # determine number of parents nparent = len(nodes) / degree if len(nodes) % degree != 0: nparent += 1 if nparent == 1: nparent_pad = nparent else: nparent_pad = round_up_to_multiple(nparent, 1) #degree # allocate memory if gpuapi.is_gpu_api_cuda(): gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4) elif gpuapi.is_gpu_api_opencl(): parent_nodes_np = np.zeros(shape=nparent, dtype=ga.vec.uint4) gpu_parent_nodes = ga.to_device(queue, parent_nodes_np) gpu_nodes = ga.to_device(queue, nodes) else: raise RuntimeError('API is neither CUDA nor OpenCL?!') # run kernel if gpuapi.is_gpu_api_cuda(): for first_index, elements_this_iter, nblocks_this_iter in \ chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): bvh_funcs.make_parents(np.uint32(first_index), np.uint32(elements_this_iter), np.uint32(degree), gpu_parent_nodes, cuda.In(nodes), np.uint32(0), np.uint32(len(nodes)), block=(nthreads_per_block, 1, 1), grid=(nblocks_this_iter, 1)) elif gpuapi.is_gpu_api_opencl(): for first_index, elements_this_iter, nblocks_this_iter in \ chunk_iterator(nparent, nthreads_per_block, max_blocks=1): bvh_funcs.make_parents(queue, (elements_this_iter, 1, 1), None, np.uint32(first_index), np.uint32(elements_this_iter), np.uint32(degree), gpu_parent_nodes.data, gpu_nodes.data, np.uint32(0), np.uint32(len(nodes))).wait() else: raise RuntimeError('API is neither CUDA nor OpenCL?!') parent_nodes = gpu_parent_nodes.get() if max_ratio is not None: areas = node_areas(parent_nodes) child_areas = node_areas(nodes) excessive_area = np.zeros(shape=len(areas), dtype=bool) for i, parent_area in enumerate(areas): nchild = parent_nodes['w'][i] >> CHILD_BITS child_index = parent_nodes['w'][i] & ~NCHILD_MASK child_area = child_areas[child_index:child_index + nchild].sum() #if parent_area > 1e9: # print i, 'Children: %e, Parent: %e' % (child_area, parent_area) if child_area / parent_area < 0.3: excessive_area[i] = True #print i, 'Children: %e, Parent: %e' % (child_area, parent_area) extra_slots = round_up_to_multiple( (degree - 1) * np.count_nonzero(excessive_area), 1) print 'Extra slots:', extra_slots new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots, dtype=parent_nodes.dtype) new_parent_nodes[:len(parent_nodes)] = parent_nodes offset = 0 for count, index in enumerate(np.argwhere(excessive_area)): index = index[0] + offset nchild = new_parent_nodes['w'][index] >> CHILD_BITS child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK new_parent_nodes[index] = nodes[child_index] #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | ( tmp_child_index + len(nodes)) if nchild == 1: continue # slide everyone over #print index, nchild, len(new_parent_nodes) new_parent_nodes[index + nchild:] = new_parent_nodes[index + 1:-nchild + 1] offset += nchild - 1 for sibling in xrange(nchild - 1): new_parent_index = index + 1 + sibling new_parent_nodes[new_parent_index] = nodes[child_index + sibling + 1] if new_parent_nodes['x'][new_parent_index] != 0: tmp_nchild = new_parent_nodes['w'][ new_parent_index] >> CHILD_BITS tmp_child_index = new_parent_nodes['w'][ new_parent_index] & ~NCHILD_MASK new_parent_nodes['w'][ new_parent_index] = tmp_nchild << CHILD_BITS | ( tmp_child_index + len(nodes)) #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1) #print 'intermediate: %e' % node_areas(new_parent_nodes).max() print 'old: %e' % node_areas(parent_nodes).max() print 'new: %e' % node_areas(new_parent_nodes).max() if len(new_parent_nodes) < len(nodes): # Only adopt new set of parent nodes if it actually reduces the # total number of nodes at this level by 1. parent_nodes = new_parent_nodes return parent_nodes
def merge_nodes(nodes, degree, max_ratio=None): bvh_module = get_cu_module('bvh.cu', options=cuda_options, include_source_directory=True) bvh_funcs = GPUFuncs(bvh_module) nparent = len(nodes) / degree if len(nodes) % degree != 0: nparent += 1 if nparent == 1: nparent_pad = nparent else: nparent_pad = round_up_to_multiple(nparent, 1)#degree) gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4) nthreads_per_block = 256 for first_index, elements_this_iter, nblocks_this_iter in \ chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): bvh_funcs.make_parents(np.uint32(first_index), np.uint32(elements_this_iter), np.uint32(degree), gpu_parent_nodes, cuda.In(nodes), np.uint32(0), np.uint32(len(nodes)), block=(nthreads_per_block,1,1), grid=(nblocks_this_iter,1)) parent_nodes = gpu_parent_nodes.get() if max_ratio is not None: areas = node_areas(parent_nodes) child_areas = node_areas(nodes) excessive_area = np.zeros(shape=len(areas), dtype=bool) for i, parent_area in enumerate(areas): nchild = parent_nodes['w'][i] >> CHILD_BITS child_index = parent_nodes['w'][i] & ~NCHILD_MASK child_area = child_areas[child_index:child_index+nchild].sum() #if parent_area > 1e9: # print i, 'Children: %e, Parent: %e' % (child_area, parent_area) if child_area/parent_area < 0.3: excessive_area[i] = True #print i, 'Children: %e, Parent: %e' % (child_area, parent_area) extra_slots = round_up_to_multiple((degree - 1) * np.count_nonzero(excessive_area), 1) print 'Extra slots:', extra_slots new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots, dtype=parent_nodes.dtype) new_parent_nodes[:len(parent_nodes)] = parent_nodes offset = 0 for count, index in enumerate(np.argwhere(excessive_area)): index = index[0] + offset nchild = new_parent_nodes['w'][index] >> CHILD_BITS child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK new_parent_nodes[index] = nodes[child_index] #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes)) if nchild == 1: continue # slide everyone over #print index, nchild, len(new_parent_nodes) new_parent_nodes[index+nchild:] = new_parent_nodes[index+1:-nchild+1] offset += nchild - 1 for sibling in xrange(nchild - 1): new_parent_index = index + 1 + sibling new_parent_nodes[new_parent_index] = nodes[child_index + sibling + 1] if new_parent_nodes['x'][new_parent_index] != 0: tmp_nchild = new_parent_nodes['w'][new_parent_index] >> CHILD_BITS tmp_child_index = new_parent_nodes['w'][new_parent_index] & ~NCHILD_MASK new_parent_nodes['w'][new_parent_index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes)) #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1) #print 'intermediate: %e' % node_areas(new_parent_nodes).max() print 'old: %e' % node_areas(parent_nodes).max() print 'new: %e' % node_areas(new_parent_nodes).max() if len(new_parent_nodes) < len(nodes): # Only adopt new set of parent nodes if it actually reduces the # total number of nodes at this level by 1. parent_nodes = new_parent_nodes return parent_nodes
def merge_nodes(nodes, degree, max_ratio=None): bvh_module = get_cu_module('bvh.cu', options=cuda_options, include_source_directory=True) bvh_funcs = GPUFuncs(bvh_module) nparent = len(nodes) / degree if len(nodes) % degree != 0: nparent += 1 if nparent == 1: nparent_pad = nparent else: nparent_pad = round_up_to_multiple(nparent, 1) #degree) gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4) nthreads_per_block = 256 for first_index, elements_this_iter, nblocks_this_iter in \ chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): bvh_funcs.make_parents(np.uint32(first_index), np.uint32(elements_this_iter), np.uint32(degree), gpu_parent_nodes, cuda.In(nodes), np.uint32(0), np.uint32(len(nodes)), block=(nthreads_per_block, 1, 1), grid=(nblocks_this_iter, 1)) parent_nodes = gpu_parent_nodes.get() if max_ratio is not None: areas = node_areas(parent_nodes) child_areas = node_areas(nodes) excessive_area = np.zeros(shape=len(areas), dtype=bool) for i, parent_area in enumerate(areas): nchild = parent_nodes['w'][i] >> CHILD_BITS child_index = parent_nodes['w'][i] & ~NCHILD_MASK child_area = child_areas[child_index:child_index + nchild].sum() #if parent_area > 1e9: # print i, 'Children: %e, Parent: %e' % (child_area, parent_area) if child_area / parent_area < 0.3: excessive_area[i] = True #print i, 'Children: %e, Parent: %e' % (child_area, parent_area) extra_slots = round_up_to_multiple( (degree - 1) * np.count_nonzero(excessive_area), 1) print 'Extra slots:', extra_slots new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots, dtype=parent_nodes.dtype) new_parent_nodes[:len(parent_nodes)] = parent_nodes offset = 0 for count, index in enumerate(np.argwhere(excessive_area)): index = index[0] + offset nchild = new_parent_nodes['w'][index] >> CHILD_BITS child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK new_parent_nodes[index] = nodes[child_index] #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | ( tmp_child_index + len(nodes)) if nchild == 1: continue # slide everyone over #print index, nchild, len(new_parent_nodes) new_parent_nodes[index + nchild:] = new_parent_nodes[index + 1:-nchild + 1] offset += nchild - 1 for sibling in xrange(nchild - 1): new_parent_index = index + 1 + sibling new_parent_nodes[new_parent_index] = nodes[child_index + sibling + 1] if new_parent_nodes['x'][new_parent_index] != 0: tmp_nchild = new_parent_nodes['w'][ new_parent_index] >> CHILD_BITS tmp_child_index = new_parent_nodes['w'][ new_parent_index] & ~NCHILD_MASK new_parent_nodes['w'][ new_parent_index] = tmp_nchild << CHILD_BITS | ( tmp_child_index + len(nodes)) #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1) #print 'intermediate: %e' % node_areas(new_parent_nodes).max() print 'old: %e' % node_areas(parent_nodes).max() print 'new: %e' % node_areas(new_parent_nodes).max() if len(new_parent_nodes) < len(nodes): # Only adopt new set of parent nodes if it actually reduces the # total number of nodes at this level by 1. parent_nodes = new_parent_nodes return parent_nodes