Python get_cu_module示例，chroma.gpu.tools.get_cu_module Python示例

示例#1

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def concatenate_layers(layers):
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)
    # Put 0 at beginning of list
    layer_bounds = np.insert(np.cumsum(map(len, layers)), 0, 0)
    nodes = ga.empty(shape=int(layer_bounds[-1]), dtype=ga.vec.uint4)
    nthreads_per_block = 256

    for layer_start, layer_end, layer in zip(layer_bounds[:-1],
                                             layer_bounds[1:],
                                             layers):
        if layer_end == layer_bounds[-1]:
            # leaf nodes need no offset
            child_offset = 0
        else:
            child_offset = layer_end

        for first_index, elements_this_iter, nblocks_this_iter in \
                chunk_iterator(layer_end-layer_start, nthreads_per_block,
                               max_blocks=10000):
            bvh_funcs.copy_and_offset(np.uint32(first_index),
                                      np.uint32(elements_this_iter),
                                      np.uint32(child_offset),
                                      cuda.In(layer),
                                      nodes[layer_start:],
                                      block=(nthreads_per_block,1,1),
                                      grid=(nblocks_this_iter,1))
    return nodes.get(), layer_bounds

示例#2

0

显示文件

文件： photon_hit.py 项目： NuTufts/ChromaUBooNE

    def __init__(self, photons, ncopies=1, max_time=4.):
        """Load ``photons`` onto the GPU, replicating as requested.

           Args:
               - photons: chroma.Event.Photons
                   Photon state information to load onto GPU
               - ncopies: int, *optional*
                   Number of times to replicate the photons
                   on the GPU.  This is used if you want
                   to propagate the same event many times,
                   for example in a likelihood calculation.

                   The amount of GPU storage will be proportionally
                   larger if ncopies > 1, so be careful.
        """

        module = get_cu_module('propagate_hit.cu', options=cuda_options)
        propagate_hit_kernel = module.get_function('propagate_hit')
        propagate_hit_kernel.prepare('iiPPPPPPPPPPPiiiPPP')
        self.propagate_hit_kernel = propagate_hit_kernel
        self.gpu_funcs = GPUFuncs(module)

        self.max_time = max_time
        self.ncopies = ncopies
        self.true_nphotons = len(photons)
        self.marshall_photons(photons, ncopies)

示例#3

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def concatenate_layers(layers):
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)
    # Put 0 at beginning of list
    layer_bounds = np.insert(np.cumsum(map(len, layers)), 0, 0)
    nodes = ga.empty(shape=int(layer_bounds[-1]), dtype=ga.vec.uint4)
    nthreads_per_block = 256

    for layer_start, layer_end, layer in zip(layer_bounds[:-1],
                                             layer_bounds[1:], layers):
        if layer_end == layer_bounds[-1]:
            # leaf nodes need no offset
            child_offset = 0
        else:
            child_offset = layer_end

        for first_index, elements_this_iter, nblocks_this_iter in \
                chunk_iterator(layer_end-layer_start, nthreads_per_block,
                               max_blocks=10000):
            bvh_funcs.copy_and_offset(np.uint32(first_index),
                                      np.uint32(elements_this_iter),
                                      np.uint32(child_offset),
                                      cuda.In(layer),
                                      nodes[layer_start:],
                                      block=(nthreads_per_block, 1, 1),
                                      grid=(nblocks_this_iter, 1))
    return nodes.get(), layer_bounds

示例#4

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def merge_nodes_detailed(nodes, first_child, nchild):
    '''Merges nodes into len(first_child) parent nodes, using
    the provided arrays to determine the index of the first
    child of each parent, and how many children there are.'''
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    gpu_nodes = ga.to_gpu(nodes)
    gpu_first_child = ga.to_gpu(first_child.astype(np.int32))
    gpu_nchild = ga.to_gpu(nchild.astype(np.int32))

    nparent = len(first_child)
    gpu_parent_nodes = ga.empty(shape=nparent, dtype=ga.vec.uint4)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):

        bvh_funcs.make_parents_detailed(np.uint32(first_index),
                                        np.uint32(elements_this_iter),
                                        gpu_nodes,
                                        gpu_parent_nodes,
                                        gpu_first_child,
                                        gpu_nchild,
                                        block=(nthreads_per_block,1,1),
                                        grid=(nblocks_this_iter,1))

    return gpu_parent_nodes.get()

示例#5

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def merge_nodes_detailed(nodes, first_child, nchild):
    '''Merges nodes into len(first_child) parent nodes, using
    the provided arrays to determine the index of the first
    child of each parent, and how many children there are.'''
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    gpu_nodes = ga.to_gpu(nodes)
    gpu_first_child = ga.to_gpu(first_child.astype(np.int32))
    gpu_nchild = ga.to_gpu(nchild.astype(np.int32))

    nparent = len(first_child)
    gpu_parent_nodes = ga.empty(shape=nparent, dtype=ga.vec.uint4)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):

        bvh_funcs.make_parents_detailed(np.uint32(first_index),
                                        np.uint32(elements_this_iter),
                                        gpu_nodes,
                                        gpu_parent_nodes,
                                        gpu_first_child,
                                        gpu_nchild,
                                        block=(nthreads_per_block, 1, 1),
                                        grid=(nblocks_this_iter, 1))

    return gpu_parent_nodes.get()

示例#6

0

显示文件

文件： render.py 项目： BenLand100/chroma

    def __init__(self, pos, dir, max_alpha_depth=10, nblocks=64):
        self.pos = ga.to_gpu(to_float3(pos))
        self.dir = ga.to_gpu(to_float3(dir))

        self.max_alpha_depth = max_alpha_depth

        self.nblocks = nblocks

        transform_module = get_cu_module('transform.cu', options=cuda_options)
        self.transform_funcs = GPUFuncs(transform_module)

        render_module = get_cu_module('render.cu', options=cuda_options)
        self.render_funcs = GPUFuncs(render_module)

        self.dx = ga.empty(max_alpha_depth*self.pos.size, dtype=np.float32)
        self.color = ga.empty(self.dx.size, dtype=ga.vec.float4)
        self.dxlen = ga.zeros(self.pos.size, dtype=np.uint32)

示例#7

0

显示文件

    def __init__(self, pos, dir, max_alpha_depth=10, nblocks=64):
        self.pos = ga.to_gpu(to_float3(pos))
        self.dir = ga.to_gpu(to_float3(dir))

        self.max_alpha_depth = max_alpha_depth

        self.nblocks = nblocks

        transform_module = get_cu_module('transform.cu', options=cuda_options)
        self.transform_funcs = GPUFuncs(transform_module)

        render_module = get_cu_module('render.cu', options=cuda_options)
        self.render_funcs = GPUFuncs(render_module)

        self.dx = ga.empty(max_alpha_depth * self.pos.size, dtype=np.float32)
        self.color = ga.empty(self.dx.size, dtype=ga.vec.float4)
        self.dxlen = ga.zeros(self.pos.size, dtype=np.uint32)

示例#8

0

显示文件

文件： photon.py 项目： BenLand100/chroma

    def __init__(self, photons, ncopies=1):
        """Load ``photons`` onto the GPU, replicating as requested.

           Args:
               - photons: chroma.Event.Photons
                   Photon state information to load onto GPU
               - ncopies: int, *optional*
                   Number of times to replicate the photons
                   on the GPU.  This is used if you want
                   to propagate the same event many times,
                   for example in a likelihood calculation.

                   The amount of GPU storage will be proportionally
                   larger if ncopies > 1, so be careful.
        """
        nphotons = len(photons)
        self.pos = ga.empty(shape=nphotons*ncopies, dtype=ga.vec.float3)
        self.dir = ga.empty(shape=nphotons*ncopies, dtype=ga.vec.float3)
        self.pol = ga.empty(shape=nphotons*ncopies, dtype=ga.vec.float3)
        self.wavelengths = ga.empty(shape=nphotons*ncopies, dtype=np.float32)
        self.t = ga.empty(shape=nphotons*ncopies, dtype=np.float32)
        self.last_hit_triangles = ga.empty(shape=nphotons*ncopies, dtype=np.int32)
        self.flags = ga.empty(shape=nphotons*ncopies, dtype=np.uint32)
        self.weights = ga.empty(shape=nphotons*ncopies, dtype=np.float32)

        # Assign the provided photons to the beginning (possibly
        # the entire array if ncopies is 1
        self.pos[:nphotons].set(to_float3(photons.pos))
        self.dir[:nphotons].set(to_float3(photons.dir))
        self.pol[:nphotons].set(to_float3(photons.pol))
        self.wavelengths[:nphotons].set(photons.wavelengths.astype(np.float32))
        self.t[:nphotons].set(photons.t.astype(np.float32))
        self.last_hit_triangles[:nphotons].set(photons.last_hit_triangles.astype(np.int32))
        self.flags[:nphotons].set(photons.flags.astype(np.uint32))
        self.weights[:nphotons].set(photons.weights.astype(np.float32))

        module = get_cu_module('propagate.cu', options=cuda_options)
        self.gpu_funcs = GPUFuncs(module)

        # Replicate the photons to the rest of the slots if needed
        if ncopies > 1:
            max_blocks = 1024
            nthreads_per_block = 64
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                self.gpu_funcs.photon_duplicate(np.int32(first_photon), np.int32(photons_this_round),
                                                self.pos, self.dir, self.wavelengths, self.pol, self.t, 
                                                self.flags, self.last_hit_triangles, self.weights,
                                                np.int32(ncopies-1), 
                                                np.int32(nphotons),
                                                block=(nthreads_per_block,1,1), grid=(blocks, 1))


        # Save the duplication information for the iterate_copies() method
        self.true_nphotons = nphotons
        self.ncopies = ncopies

示例#9

0

显示文件

    def __init__(self, gpu_detector, ndaq=1):
        self.earliest_time_gpu = ga.empty(gpu_detector.nchannels*ndaq, dtype=np.float32)
        self.earliest_time_int_gpu = ga.empty(gpu_detector.nchannels*ndaq, dtype=np.uint32)
        self.channel_history_gpu = ga.zeros_like(self.earliest_time_int_gpu)
        self.channel_q_int_gpu = ga.zeros_like(self.earliest_time_int_gpu)
        self.channel_q_gpu = ga.zeros(len(self.earliest_time_int_gpu), dtype=np.float32)
        self.detector_gpu = gpu_detector.detector_gpu
        self.solid_id_map_gpu = gpu_detector.solid_id_map
        self.solid_id_to_channel_index_gpu = gpu_detector.solid_id_to_channel_index_gpu

        self.module = get_cu_module('daq.cu', options=cuda_options, 
                                    include_source_directory=True)
        self.gpu_funcs = GPUFuncs(self.module)
        self.ndaq = ndaq
        self.stride = gpu_detector.nchannels

示例#10

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def area_sort_nodes(gpu_geometry, layer_bounds):
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
    bounds.reverse()
    nthreads_per_block = 256
    for start, end in bounds:
        bvh_funcs.area_sort_child(np.uint32(start),
                                  np.uint32(end),
                                  gpu_geometry,
                                  block=(nthreads_per_block,1,1),
                                  grid=(120,1))
    return gpu_geometry.nodes.get()

示例#11

0

显示文件

    def color_solids(self, solid_hit, colors, nblocks_per_thread=64,
                     max_blocks=1024):
        solid_hit_gpu = ga.to_gpu(np.array(solid_hit, dtype=np.bool))
        solid_colors_gpu = ga.to_gpu(np.array(colors, dtype=np.uint32))

        module = get_cu_module('mesh.h', options=cuda_options)
        color_solids = module.get_function('color_solids')

        for first_triangle, triangles_this_round, blocks in \
                chunk_iterator(self.triangles.size, nblocks_per_thread,
                               max_blocks):
            color_solids(np.int32(first_triangle),
                         np.int32(triangles_this_round), self.solid_id_map,
                         solid_hit_gpu, solid_colors_gpu, self.gpudata,
                         block=(nblocks_per_thread,1,1), 
                         grid=(blocks,1))

示例#12

0

显示文件

文件： geometry.py 项目： BenLand100/chroma

    def color_solids(self, solid_hit, colors, nblocks_per_thread=64,
                     max_blocks=1024):
        solid_hit_gpu = ga.to_gpu(np.array(solid_hit, dtype=np.bool))
        solid_colors_gpu = ga.to_gpu(np.array(colors, dtype=np.uint32))

        module = get_cu_module('mesh.h', options=cuda_options)
        color_solids = module.get_function('color_solids')

        for first_triangle, triangles_this_round, blocks in \
                chunk_iterator(self.triangles.size, nblocks_per_thread,
                               max_blocks):
            color_solids(np.int32(first_triangle),
                         np.int32(triangles_this_round), self.solid_id_map,
                         solid_hit_gpu, solid_colors_gpu, self.gpudata,
                         block=(nblocks_per_thread,1,1), 
                         grid=(blocks,1))

示例#13

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def area_sort_nodes(gpu_geometry, layer_bounds):
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
    bounds.reverse()
    nthreads_per_block = 256
    for start, end in bounds:
        bvh_funcs.area_sort_child(np.uint32(start),
                                  np.uint32(end),
                                  gpu_geometry,
                                  block=(nthreads_per_block, 1, 1),
                                  grid=(120, 1))
    return gpu_geometry.nodes.get()

示例#14

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def collapse_chains(nodes, layer_bounds):
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)
    
    gpu_nodes = ga.to_gpu(nodes)

    bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
    bounds.reverse()
    nthreads_per_block = 256
    for start, end in bounds:
        bvh_funcs.collapse_child(np.uint32(start),
                                 np.uint32(end),
                                 gpu_nodes,
                                 block=(nthreads_per_block,1,1),
                                 grid=(120,1))
    return gpu_nodes.get()

示例#15

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def collapse_chains(nodes, layer_bounds):
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    gpu_nodes = ga.to_gpu(nodes)

    bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
    bounds.reverse()
    nthreads_per_block = 256
    for start, end in bounds:
        bvh_funcs.collapse_child(np.uint32(start),
                                 np.uint32(end),
                                 gpu_nodes,
                                 block=(nthreads_per_block, 1, 1),
                                 grid=(120, 1))
    return gpu_nodes.get()

示例#16

0

显示文件

文件： photon.py 项目： BenLand100/chroma

    def __init__(self, pos, dir, pol, wavelengths, t, last_hit_triangles,
                 flags, weights):
        '''Create new object using slices of GPUArrays from an instance
        of GPUPhotons.  NOTE THESE ARE NOT CPU ARRAYS!'''
        self.pos = pos
        self.dir = dir
        self.pol = pol
        self.wavelengths = wavelengths
        self.t = t
        self.last_hit_triangles = last_hit_triangles
        self.flags = flags
        self.weights = weights

        module = get_cu_module('propagate.cu', options=cuda_options)
        self.gpu_funcs = GPUFuncs(module)

        self.true_nphotons = len(pos)
        self.ncopies = 1

示例#17

0

显示文件

    def __init__(self, pos, dir, pol, wavelengths, t, last_hit_triangles,
                 flags, weights):
        '''Create new object using slices of GPUArrays from an instance
        of GPUPhotons.  NOTE THESE ARE NOT CPU ARRAYS!'''
        self.pos = pos
        self.dir = dir
        self.pol = pol
        self.wavelengths = wavelengths
        self.t = t
        self.last_hit_triangles = last_hit_triangles
        self.flags = flags
        self.weights = weights

        module = get_cu_module('propagate.cu', options=cuda_options)
        self.gpu_funcs = GPUFuncs(module)

        self.true_nphotons = len(pos)
        self.ncopies = 1

示例#18

0

显示文件

文件： photon.py 项目： wcgillis01/chroma

    def __init__(self,
                 photons,
                 ncopies=1,
                 copy_flags=True,
                 copy_triangles=True,
                 copy_weights=True):
        """Load ``photons`` onto the GPU, replicating as requested.

           Args:
               - photons: chroma.Event.Photons
                   Photon state information to load onto GPU
               - ncopies: int, *optional*
                   Number of times to replicate the photons
                   on the GPU.  This is used if you want
                   to propagate the same event many times,
                   for example in a likelihood calculation.

                   The amount of GPU storage will be proportionally
                   larger if ncopies > 1, so be careful.
        """
        nphotons = len(photons)
        self.pos = ga.empty(shape=nphotons * ncopies, dtype=ga.vec.float3)
        self.dir = ga.empty(shape=nphotons * ncopies, dtype=ga.vec.float3)
        self.pol = ga.empty(shape=nphotons * ncopies, dtype=ga.vec.float3)
        self.wavelengths = ga.empty(shape=nphotons * ncopies, dtype=np.float32)
        self.t = ga.empty(shape=nphotons * ncopies, dtype=np.float32)
        self.last_hit_triangles = ga.empty(shape=nphotons * ncopies,
                                           dtype=np.int32)
        if not copy_triangles:
            self.last_hit_triangles.fill(-1)
        if not copy_flags:
            self.flags = ga.zeros(shape=nphotons * ncopies, dtype=np.uint32)
        else:
            self.flags = ga.empty(shape=nphotons * ncopies, dtype=np.uint32)
        if not copy_weights:
            self.weights = ga.ones_like(self.last_hit_triangles,
                                        dtype=np.float32)
        else:
            self.weights = ga.empty(shape=nphotons * ncopies, dtype=np.float32)
        self.evidx = ga.empty(shape=nphotons, dtype=np.uint32)

        # Assign the provided photons to the beginning (possibly
        # the entire array if ncopies is 1
        self.pos[:nphotons].set(to_float3(photons.pos))
        self.dir[:nphotons].set(to_float3(photons.dir))
        self.pol[:nphotons].set(to_float3(photons.pol))
        self.wavelengths[:nphotons].set(photons.wavelengths.astype(np.float32))
        self.t[:nphotons].set(photons.t.astype(np.float32))
        if copy_triangles:
            self.last_hit_triangles[:nphotons].set(
                photons.last_hit_triangles.astype(np.int32))
        if copy_flags:
            self.flags[:nphotons].set(photons.flags.astype(np.uint32))
        if copy_weights:
            self.weights[:nphotons].set(photons.weights.astype(np.float32))
        self.evidx[:nphotons].set(photons.evidx.astype(np.uint32))

        module = get_cu_module('propagate.cu', options=cuda_options)
        self.gpu_funcs = GPUFuncs(module)

        # Replicate the photons to the rest of the slots if needed
        if ncopies > 1:
            max_blocks = 1024
            nthreads_per_block = 64
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                self.gpu_funcs.photon_duplicate(np.int32(first_photon),
                                                np.int32(photons_this_round),
                                                self.pos,
                                                self.dir,
                                                self.wavelengths,
                                                self.pol,
                                                self.t,
                                                self.flags,
                                                self.last_hit_triangles,
                                                self.weights,
                                                self.evidx,
                                                np.int32(ncopies - 1),
                                                np.int32(nphotons),
                                                block=(nthreads_per_block, 1,
                                                       1),
                                                grid=(blocks, 1))

        # Save the duplication information for the iterate_copies() method
        self.true_nphotons = nphotons
        self.ncopies = ncopies

示例#19

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def merge_nodes(nodes, degree, max_ratio=None):
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    nparent = len(nodes) / degree
    if len(nodes) % degree != 0:
        nparent += 1

    if nparent == 1:
        nparent_pad = nparent
    else:
        nparent_pad = round_up_to_multiple(nparent, 1)  #degree)
    gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
        bvh_funcs.make_parents(np.uint32(first_index),
                               np.uint32(elements_this_iter),
                               np.uint32(degree),
                               gpu_parent_nodes,
                               cuda.In(nodes),
                               np.uint32(0),
                               np.uint32(len(nodes)),
                               block=(nthreads_per_block, 1, 1),
                               grid=(nblocks_this_iter, 1))

    parent_nodes = gpu_parent_nodes.get()

    if max_ratio is not None:
        areas = node_areas(parent_nodes)
        child_areas = node_areas(nodes)

        excessive_area = np.zeros(shape=len(areas), dtype=bool)
        for i, parent_area in enumerate(areas):
            nchild = parent_nodes['w'][i] >> CHILD_BITS
            child_index = parent_nodes['w'][i] & ~NCHILD_MASK
            child_area = child_areas[child_index:child_index + nchild].sum()
            #if parent_area > 1e9:
            #    print i, 'Children: %e, Parent: %e' % (child_area, parent_area)
            if child_area / parent_area < 0.3:
                excessive_area[i] = True
                #print i, 'Children: %e, Parent: %e' % (child_area, parent_area)

        extra_slots = round_up_to_multiple(
            (degree - 1) * np.count_nonzero(excessive_area), 1)
        print 'Extra slots:', extra_slots
        new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots,
                                    dtype=parent_nodes.dtype)
        new_parent_nodes[:len(parent_nodes)] = parent_nodes

        offset = 0
        for count, index in enumerate(np.argwhere(excessive_area)):
            index = index[0] + offset
            nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes[index] = nodes[child_index]
            #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index
            tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (
                tmp_child_index + len(nodes))

            if nchild == 1:
                continue

            # slide everyone over
            #print index, nchild, len(new_parent_nodes)
            new_parent_nodes[index + nchild:] = new_parent_nodes[index +
                                                                 1:-nchild + 1]
            offset += nchild - 1
            for sibling in xrange(nchild - 1):
                new_parent_index = index + 1 + sibling
                new_parent_nodes[new_parent_index] = nodes[child_index +
                                                           sibling + 1]
                if new_parent_nodes['x'][new_parent_index] != 0:
                    tmp_nchild = new_parent_nodes['w'][
                        new_parent_index] >> CHILD_BITS
                    tmp_child_index = new_parent_nodes['w'][
                        new_parent_index] & ~NCHILD_MASK
                    new_parent_nodes['w'][
                        new_parent_index] = tmp_nchild << CHILD_BITS | (
                            tmp_child_index + len(nodes))

                    #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1)

            #print 'intermediate: %e' % node_areas(new_parent_nodes).max()
        print 'old: %e' % node_areas(parent_nodes).max()
        print 'new: %e' % node_areas(new_parent_nodes).max()
        if len(new_parent_nodes) < len(nodes):
            # Only adopt new set of parent nodes if it actually reduces the
            # total number of nodes at this level by 1.
            parent_nodes = new_parent_nodes

    return parent_nodes

示例#20

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1):
    '''Compute the leaf nodes surrounding a triangle mesh.

      ``mesh``: chroma.geometry.Mesh
        Triangles to box
      ``morton_bits``: int
        Number of bits to use per dimension when computing Morton code.
      ``round_to_multiple``: int
        Round the number of nodes created up to multiple of this number
        Extra nodes will be all zero.
        
    Returns (world_coords, nodes, morton_codes), where
      ``world_coords``: chroma.bvh.WorldCoords
        Defines the fixed point coordinate system
      ``nodes``: ndarray(shape=len(mesh.triangles), dtype=uint4)
        List of leaf nodes.  Child IDs will be set to triangle offsets.
      ``morton_codes``: ndarray(shape=len(mesh.triangles), dtype=np.uint64)
        Morton codes for each triangle, using ``morton_bits`` per axis.
        Must be <= 16 bits.
    '''
    # Load GPU functions
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    # compute world coordinates
    world_origin = mesh.vertices.min(axis=0)
    world_scale = np.max((mesh.vertices.max(axis=0) - world_origin)) \
        / (2**16 - 2)
    world_coords = WorldCoords(world_origin=world_origin,
                               world_scale=world_scale)

    # Put triangles and vertices in mapped host memory
    triangles = mapped_empty(shape=len(mesh.triangles),
                             dtype=ga.vec.uint3,
                             write_combined=True)
    triangles[:] = to_uint3(mesh.triangles)
    vertices = mapped_empty(shape=len(mesh.vertices),
                            dtype=ga.vec.float3,
                            write_combined=True)
    vertices[:] = to_float3(mesh.vertices)

    # Call GPU to compute nodes
    nodes = ga.zeros(shape=round_up_to_multiple(len(triangles),
                                                round_to_multiple),
                     dtype=ga.vec.uint4)
    morton_codes = ga.empty(shape=len(triangles), dtype=np.uint64)

    # Convert world coords to GPU-friendly types
    world_origin = ga.vec.make_float3(*world_origin)
    world_scale = np.float32(world_scale)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(len(triangles), nthreads_per_block,
                           max_blocks=30000):
        bvh_funcs.make_leaves(np.uint32(first_index),
                              np.uint32(elements_this_iter),
                              Mapped(triangles),
                              Mapped(vertices),
                              world_origin,
                              world_scale,
                              nodes,
                              morton_codes,
                              block=(nthreads_per_block, 1, 1),
                              grid=(nblocks_this_iter, 1))

    morton_codes_host = morton_codes.get() >> (16 - morton_bits)
    return world_coords, nodes.get(), morton_codes_host

示例#21

0

显示文件

文件： bvh.py 项目： jhualberta/chroma

def optimize_layer(orig_nodes):
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    nodes = ga.to_gpu(orig_nodes)
    n = len(nodes)
    areas = ga.empty(shape=n / 2, dtype=np.uint64)
    nthreads_per_block = 128

    min_areas = ga.empty(shape=int(np.ceil(n / float(nthreads_per_block))),
                         dtype=np.uint64)
    min_index = ga.empty(shape=min_areas.shape, dtype=np.uint32)

    update = 10000

    skip_size = 1
    flag = mapped_empty(shape=skip_size, dtype=np.uint32)

    i = 0
    skips = 0
    swaps = 0
    while i < n / 2 - 1:
        # How are we doing?
        if i % update == 0:
            for first_index, elements_this_iter, nblocks_this_iter in \
                    chunk_iterator(n/2, nthreads_per_block, max_blocks=10000):

                bvh_funcs.pair_area(np.uint32(first_index),
                                    np.uint32(elements_this_iter),
                                    nodes,
                                    areas,
                                    block=(nthreads_per_block, 1, 1),
                                    grid=(nblocks_this_iter, 1))

            areas_host = areas.get()
            #print nodes.get(), areas_host.astype(float)
            print 'Area of parent layer so far (%d): %1.12e' % (
                i * 2, areas_host.astype(float).sum())
            print 'Skips: %d, Swaps: %d' % (skips, swaps)

        test_index = i * 2

        blocks = 0
        look_forward = min(8192 * 50, n - test_index - 2)
        skip_this_round = min(skip_size, n - test_index - 1)
        flag[:] = 0
        for first_index, elements_this_iter, nblocks_this_iter in \
                chunk_iterator(look_forward, nthreads_per_block, max_blocks=10000):
            bvh_funcs.min_distance_to(np.uint32(first_index + test_index + 2),
                                      np.uint32(elements_this_iter),
                                      np.uint32(test_index),
                                      nodes,
                                      np.uint32(blocks),
                                      min_areas,
                                      min_index,
                                      Mapped(flag),
                                      block=(nthreads_per_block, 1, 1),
                                      grid=(nblocks_this_iter,
                                            skip_this_round))
            blocks += nblocks_this_iter
            #print i, first_index, nblocks_this_iter, look_forward
        cuda.Context.get_current().synchronize()

        if flag[0] == 0:
            flag_nonzero = flag.nonzero()[0]
            if len(flag_nonzero) == 0:
                no_swap_required = skip_size
            else:
                no_swap_required = flag_nonzero[0]
            i += no_swap_required
            skips += no_swap_required
            continue

        min_areas_host = min_areas[:blocks].get()
        min_index_host = min_index[:blocks].get()
        best_block = min_areas_host.argmin()
        better_i = min_index_host[best_block]

        swaps += 1
        #print 'swap', test_index+1, better_i
        assert 0 < better_i < len(nodes)
        assert 0 < test_index + 1 < len(nodes)
        bvh_funcs.swap(np.uint32(test_index + 1),
                       np.uint32(better_i),
                       nodes,
                       block=(1, 1, 1),
                       grid=(1, 1))
        cuda.Context.get_current().synchronize()
        i += 1

    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(n/2, nthreads_per_block, max_blocks=10000):

        bvh_funcs.pair_area(np.uint32(first_index),
                            np.uint32(elements_this_iter),
                            nodes,
                            areas,
                            block=(nthreads_per_block, 1, 1),
                            grid=(nblocks_this_iter, 1))

    areas_host = areas.get()

    print 'Final area of parent layer: %1.12e' % areas_host.sum()
    print 'Skips: %d, Swaps: %d' % (skips, swaps)

    return nodes.get()

示例#22

0

显示文件

文件： pdf.py 项目： BenLand100/chroma

 def __init__(self):
     self.module = get_cu_module('pdf.cu', options=cuda_options,
                                 include_source_directory=True)
     self.gpu_funcs = GPUFuncs(self.module)

示例#23

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def merge_nodes(nodes, degree, max_ratio=None):
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)
    
    nparent = len(nodes) / degree
    if len(nodes) % degree != 0:
        nparent += 1

    if nparent == 1:
        nparent_pad = nparent
    else:
        nparent_pad = round_up_to_multiple(nparent, 1)#degree)
    gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
        bvh_funcs.make_parents(np.uint32(first_index),
                               np.uint32(elements_this_iter),
                               np.uint32(degree),
                               gpu_parent_nodes,
                               cuda.In(nodes),
                               np.uint32(0),
                               np.uint32(len(nodes)),
                               block=(nthreads_per_block,1,1),
                               grid=(nblocks_this_iter,1))

    parent_nodes = gpu_parent_nodes.get()

    if max_ratio is not None:
        areas = node_areas(parent_nodes)
        child_areas = node_areas(nodes)

        excessive_area = np.zeros(shape=len(areas), dtype=bool)
        for i, parent_area in enumerate(areas):
            nchild = parent_nodes['w'][i] >> CHILD_BITS
            child_index = parent_nodes['w'][i] & ~NCHILD_MASK
            child_area = child_areas[child_index:child_index+nchild].sum()
            #if parent_area > 1e9:
            #    print i, 'Children: %e, Parent: %e' % (child_area, parent_area)
            if child_area/parent_area < 0.3:
                excessive_area[i] = True
                #print i, 'Children: %e, Parent: %e' % (child_area, parent_area)

        extra_slots = round_up_to_multiple((degree - 1) * np.count_nonzero(excessive_area), 1)
        print 'Extra slots:', extra_slots
        new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots,
                                    dtype=parent_nodes.dtype)
        new_parent_nodes[:len(parent_nodes)] = parent_nodes

        offset = 0
        for count, index in enumerate(np.argwhere(excessive_area)):
            index = index[0] + offset
            nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes[index] = nodes[child_index]
            #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index
            tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes))

            if nchild == 1:
                continue

            # slide everyone over
            #print index, nchild, len(new_parent_nodes)
            new_parent_nodes[index+nchild:] = new_parent_nodes[index+1:-nchild+1]
            offset += nchild - 1
            for sibling in xrange(nchild - 1):
                new_parent_index = index + 1 + sibling
                new_parent_nodes[new_parent_index] = nodes[child_index + sibling + 1]
                if new_parent_nodes['x'][new_parent_index] != 0:
                    tmp_nchild = new_parent_nodes['w'][new_parent_index] >> CHILD_BITS
                    tmp_child_index = new_parent_nodes['w'][new_parent_index] & ~NCHILD_MASK
                    new_parent_nodes['w'][new_parent_index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes))

                    #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1)


            #print 'intermediate: %e' % node_areas(new_parent_nodes).max()
        print 'old: %e' % node_areas(parent_nodes).max()
        print 'new: %e' % node_areas(new_parent_nodes).max()
        if len(new_parent_nodes) < len(nodes):
            # Only adopt new set of parent nodes if it actually reduces the
            # total number of nodes at this level by 1.
            parent_nodes = new_parent_nodes

    return parent_nodes

示例#24

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1):
    '''Compute the leaf nodes surrounding a triangle mesh.

      ``mesh``: chroma.geometry.Mesh
        Triangles to box
      ``morton_bits``: int
        Number of bits to use per dimension when computing Morton code.
      ``round_to_multiple``: int
        Round the number of nodes created up to multiple of this number
        Extra nodes will be all zero.
        
    Returns (world_coords, nodes, morton_codes), where
      ``world_coords``: chroma.bvh.WorldCoords
        Defines the fixed point coordinate system
      ``nodes``: ndarray(shape=len(mesh.triangles), dtype=uint4)
        List of leaf nodes.  Child IDs will be set to triangle offsets.
      ``morton_codes``: ndarray(shape=len(mesh.triangles), dtype=np.uint64)
        Morton codes for each triangle, using ``morton_bits`` per axis.
        Must be <= 16 bits.
    '''
    # Load GPU functions
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    # compute world coordinates
    world_origin = mesh.vertices.min(axis=0)
    world_scale = np.max((mesh.vertices.max(axis=0) - world_origin)) \
        / (2**16 - 2)
    world_coords = WorldCoords(world_origin=world_origin, 
                               world_scale=world_scale)

    # Put triangles and vertices in mapped host memory
    triangles = mapped_empty(shape=len(mesh.triangles), dtype=ga.vec.uint3,
                             write_combined=True)
    triangles[:] = to_uint3(mesh.triangles)
    vertices = mapped_empty(shape=len(mesh.vertices), dtype=ga.vec.float3,
                            write_combined=True)
    vertices[:] = to_float3(mesh.vertices)
    
    # Call GPU to compute nodes
    nodes = ga.zeros(shape=round_up_to_multiple(len(triangles), 
                                                round_to_multiple),
                     dtype=ga.vec.uint4)
    morton_codes = ga.empty(shape=len(triangles), dtype=np.uint64)

    # Convert world coords to GPU-friendly types
    world_origin = ga.vec.make_float3(*world_origin)
    world_scale = np.float32(world_scale)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(len(triangles), nthreads_per_block, 
                           max_blocks=30000):
        bvh_funcs.make_leaves(np.uint32(first_index),
                              np.uint32(elements_this_iter),
                              Mapped(triangles), Mapped(vertices),
                              world_origin, world_scale,
                              nodes, morton_codes,
                              block=(nthreads_per_block,1,1),
                              grid=(nblocks_this_iter,1))

    morton_codes_host = morton_codes.get() >> (16 - morton_bits)
    return world_coords, nodes.get(), morton_codes_host

示例#25

0

显示文件

文件： pdf.py 项目： jhualberta/chroma

 def __init__(self):
     self.module = get_cu_module('pdf.cu',
                                 options=cuda_options,
                                 include_source_directory=True)
     self.gpu_funcs = GPUFuncs(self.module)

示例#26

0

显示文件

文件： bvh.py 项目： BenLand100/chroma

def optimize_layer(orig_nodes):
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    nodes = ga.to_gpu(orig_nodes)
    n = len(nodes)
    areas = ga.empty(shape=n/2, dtype=np.uint64)
    nthreads_per_block = 128

    min_areas = ga.empty(shape=int(np.ceil(n/float(nthreads_per_block))), dtype=np.uint64)
    min_index = ga.empty(shape=min_areas.shape, dtype=np.uint32)

    update = 10000

    skip_size = 1
    flag = mapped_empty(shape=skip_size, dtype=np.uint32)

    i = 0
    skips = 0
    swaps = 0
    while i < n/2 - 1:
        # How are we doing?
        if i % update == 0:
            for first_index, elements_this_iter, nblocks_this_iter in \
                    chunk_iterator(n/2, nthreads_per_block, max_blocks=10000):

                bvh_funcs.pair_area(np.uint32(first_index),
                                    np.uint32(elements_this_iter),
                                    nodes,
                                    areas,
                                    block=(nthreads_per_block,1,1),
                                    grid=(nblocks_this_iter,1))
                
            areas_host = areas.get()
            #print nodes.get(), areas_host.astype(float)
            print 'Area of parent layer so far (%d): %1.12e' % (i*2, areas_host.astype(float).sum())
            print 'Skips: %d, Swaps: %d' % (skips, swaps)

        test_index = i * 2

        blocks = 0
        look_forward = min(8192*50, n - test_index - 2)
        skip_this_round = min(skip_size, n - test_index - 1)
        flag[:] = 0
        for first_index, elements_this_iter, nblocks_this_iter in \
                chunk_iterator(look_forward, nthreads_per_block, max_blocks=10000):
            bvh_funcs.min_distance_to(np.uint32(first_index + test_index + 2),
                                      np.uint32(elements_this_iter),
                                      np.uint32(test_index),
                                      nodes,
                                      np.uint32(blocks),
                                      min_areas,
                                      min_index,
                                      Mapped(flag),
                                      block=(nthreads_per_block,1,1),
                                      grid=(nblocks_this_iter, skip_this_round))
            blocks += nblocks_this_iter
            #print i, first_index, nblocks_this_iter, look_forward
        cuda.Context.get_current().synchronize()

        if flag[0] == 0:
            flag_nonzero = flag.nonzero()[0]
            if len(flag_nonzero) == 0:
                no_swap_required = skip_size
            else:
                no_swap_required = flag_nonzero[0]
            i += no_swap_required
            skips += no_swap_required
            continue

        min_areas_host = min_areas[:blocks].get()
        min_index_host = min_index[:blocks].get()
        best_block = min_areas_host.argmin()
        better_i = min_index_host[best_block]

        swaps += 1
        #print 'swap', test_index+1, better_i
        assert 0 < better_i < len(nodes)
        assert 0 < test_index + 1 < len(nodes)
        bvh_funcs.swap(np.uint32(test_index+1), np.uint32(better_i),
                       nodes, block=(1,1,1), grid=(1,1))
        cuda.Context.get_current().synchronize()
        i += 1

    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(n/2, nthreads_per_block, max_blocks=10000):

        bvh_funcs.pair_area(np.uint32(first_index),
                            np.uint32(elements_this_iter),
                            nodes,
                            areas,
                            block=(nthreads_per_block,1,1),
                            grid=(nblocks_this_iter,1))
        
    areas_host = areas.get()

    print 'Final area of parent layer: %1.12e' % areas_host.sum()
    print 'Skips: %d, Swaps: %d' % (skips, swaps)

    return nodes.get()