Python GPUFuncs.checknode примеры использования

Язык программирования: Python

Пространство имен/Пакет: chroma.gpu.gpufuncs

Класс/Тип: GPUFuncs

Метод/Функция: checknode

Примеров на hotexamples.com: 2

Python GPUFuncs.checknode - 2 примера найдено. Это лучшие примеры Python кода для chroma.gpu.gpufuncs.GPUFuncs.checknode, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GPUFuncs(19)

run_daq_many(2)

run_daq(2)

checknode(2)

get_earliest_hit_time(1)

test_sample_cdf(1)

swap(1)

reset_earliest_time_int(1)

propagate(1)

photon_duplicate(1)

pair_area(1)

min_distance_to(1)

make_parents_detailed(1)

make_parents(1)

make_leaves(1)

make_geostruct(1)

gen_photon_from_step(1)

accumulate_bincount(1)

fillArray(1)

count_photons(1)

copy_photons(1)

copy_and_offset(1)

convert_sortable_int_to_float(1)

convert_charge_int_to_float(1)

convert_adc(1)

collapse_child(1)

bin_hits(1)

area_sort_child(1)

accumulate_nearest_neighbor_block(1)

accumulate_moments(1)

accumulate_kernel_eval(1)

test_texture(1)

Пример #1

Показать файл

    def _call_opencl_kernel(self, sim, photons, ourphotons, max_shared_nodes,
                            nodes, workgroupsize, comqueue):
        module = get_module('wq_checknode.cl',
                            self.context,
                            options=api_options,
                            include_source_directory=True)
        gpu_funcs = GPUFuncs(module)

        # gather variables for kernel call
        gpugeo = sim.gpu_geometry
        photon_pos = photons.pos
        photon_dir = photons.dir
        photon_current_node = photons.current_node_index
        photon_tested_node = ga.to_device(
            comqueue, 1 * np.ones(len(photons.pos), dtype=np.uint32))
        photon_last_result = ga.to_device(
            comqueue, -1 * np.ones(len(photons.pos), dtype=np.int32))
        nodes = gpugeo.nodes
        node_parent = ga.to_device(comqueue,
                                   sim.detector.node_dsar_tree.parent)
        node_first_daughter = ga.to_device(
            comqueue, sim.detector.node_dsar_tree.first_daughter)
        node_sibling = ga.to_device(comqueue,
                                    sim.detector.node_dsar_tree.sibling)
        node_aunt = ga.to_device(comqueue, sim.detector.node_dsar_tree.aunt)
        world_origin = gpugeo.world_origin_gpu
        world_scale = gpugeo.world_scale
        # make queue related variables
        queue_size = np.int32(len(photons.pos) * 2)
        queue_photon_index = ga.empty(comqueue, queue_size, dtype=np.int32)
        queue_slot_flag = ga.zeros(comqueue, queue_size, dtype=np.int32)
        queue_photon_index[0:len(photons.pos)] = np.arange(0,
                                                           len(photons.pos),
                                                           dtype=np.int32)[:]
        queue_photon_index[len(photons.pos):] = (
            np.ones(len(photons.pos), dtype=np.int32) * -1)[:]
        queue_slot_flag[0:len(photons.pos)] = np.ones(len(photons.pos),
                                                      dtype=np.int32)[:]
        a = ga.zeros(comqueue, 1, dtype=ga.vec.uint4)
        b = np.array(1, dtype=np.int32)
        c = np.array(1, dtype=np.uint32)
        workgroup_photons = cl.LocalMemory(b.nbytes * workgroupsize)
        workgroup_current_node = cl.LocalMemory(b.nbytes * workgroupsize)
        workgroup_tested_node = cl.LocalMemory(b.nbytes * workgroupsize)

        max_nodes_can_store = (max_shared_nodes - 20 - 3 * workgroupsize)
        max_nodes_can_store -= max_nodes_can_store % 32
        max_nodes_can_store = np.int32(max_nodes_can_store)
        loaded_node_start_index = np.int32(0)
        loaded_node_end_index = np.int32(1)
        node_front_start = ga.empty(comqueue, 1, dtype=np.int32)
        node_front_end = ga.empty(comqueue, 1, dtype=np.int32)
        workgroup_nodes = cl.LocalMemory(a.nbytes * (max_nodes_can_store + 1))
        workgroup_daughter = cl.LocalMemory(c.nbytes *
                                            (max_nodes_can_store + 1))
        workgroup_sibling = cl.LocalMemory(c.nbytes *
                                           (max_nodes_can_store + 1))
        workgroup_aunt = cl.LocalMemory(c.nbytes * (max_nodes_can_store + 1))
        max_loops = 32

        if len(gpugeo.extra_nodes) > 1:
            raise RuntimeError('did not plan for there to be a node split.')

        print photon_current_node
        print photon_tested_node
        print queue_photon_index
        print queue_slot_flag

        print "Starting node range: ", loaded_node_start_index, " to ", loaded_node_end_index
        print "Max nodes in shared: ", max_nodes_can_store
        print "Work group nodes size: ", a.nbytes * workgroupsize, " bytes = (", a.nbytes, "*", workgroupsize, ")"
        print "Available local memsize: ", self.shared_mem_size
        print "Total number of nodes: ", len(
            nodes), " (", nodes.nbytes, " bytes)"
        print "Stored node size: ", max_nodes_can_store * a.nbytes
        print "Left over: ", self.shared_mem_size - max_nodes_can_store * a.nbytes - a.nbytes * workgroupsize
        print sim.detector.bvh.layer_bounds

        print "PRESUB CURRENT NODES"
        print photon_current_node
        print "PRESUB TESTED NODES"
        print photon_tested_node

        start_queue = time.time()
        gpu_funcs.checknode(
            comqueue, (workgroupsize, 1, 1), (workgroupsize, 1, 1),
            np.int32(max_loops), photon_pos.data, photon_dir.data,
            photon_current_node.data,
            photon_tested_node.data, photon_last_result.data,
            np.int32(len(nodes)), nodes.data, node_parent.data,
            node_first_daughter.data, node_sibling.data, node_aunt.data,
            world_origin.data, world_scale, queue_size,
            queue_photon_index.data, queue_slot_flag.data,
            np.int32(len(photon_pos)), np.int32(workgroupsize),
            workgroup_photons, workgroup_current_node, workgroup_tested_node,
            max_nodes_can_store, workgroup_nodes, workgroup_daughter,
            workgroup_sibling, workgroup_aunt, loaded_node_start_index,
            loaded_node_end_index, node_front_start.data,
            node_front_end.data).wait()
        end_queue = time.time()

        print "CheckNode Queue returns. ", end_queue - start_queue, " seconds"
        print "(Current node, To Test, result)"
        node_states = zip(photon_current_node.get(), photon_tested_node.get(),
                          photon_last_result.get())
        for x in xrange(0, len(node_states), 10):
            y = x + 10
            if y > len(node_states):
                y = len(node_states)
            print x, ": ", node_states[x:y]

        print "LAST RESULT:"
        print photon_last_result.get()

        print "PHOTON QUEUE"
        photon_queue = queue_photon_index.get()
        for x in xrange(0, len(photon_queue), 32):
            y = x + 32
            if y > len(photon_queue):
                y = len(photon_queue)
            print x, ": ", photon_queue[x:y]

        print "QUEUE SLOT FLAGS"
        slot_flags = queue_slot_flag.get()
        for x in xrange(0, len(slot_flags), 32):
            y = x + 32
            if y > len(slot_flags):
                y = len(slot_flags)
            print x, ": ", slot_flags[x:y]

        print "NODE FRONT: ", node_front_start.get(
        ), " to ", node_front_end.get(
        ), node_front_end.get() - node_front_start.get()
        return

Пример #2

Показать файл

    def _call_cuda_kernel(self, sim, photons, ourphotons, max_shared_nodes,
                          nodes, workgroupsize):
        module = get_module('wq_checknode.cu',
                            options=api_options,
                            include_source_directory=True)
        gpu_funcs = GPUFuncs(module)

        # gather variables for kernel call
        gpugeo = sim.gpu_geometry
        photon_pos = photons.pos
        photon_dir = photons.dir
        photon_current_node = photons.current_node_index
        photon_tested_node = ga.to_gpu(
            1 * np.ones(len(photons.pos), dtype=np.uint32))
        photon_last_result = ga.to_gpu(
            -1 * np.ones(len(photons.pos), dtype=np.int32))
        nodes = gpugeo.nodes
        node_parent = ga.to_gpu(sim.detector.node_dsar_tree.parent)
        node_first_daughter = ga.to_gpu(
            sim.detector.node_dsar_tree.first_daughter)
        node_sibling = ga.to_gpu(sim.detector.node_dsar_tree.sibling)
        node_aunt = ga.to_gpu(sim.detector.node_dsar_tree.aunt)
        world_origin = gpugeo.world_origin
        world_scale = gpugeo.world_scale

        # make queue related variables
        queue_size = np.int32(len(photons.pos) * 2)
        queue_photon_index = ga.empty(queue_size, dtype=np.int32)
        queue_slot_flag = ga.zeros(queue_size, dtype=np.int32)
        queue_photon_index[0:len(photons.pos)].set(
            np.arange(0, len(photons.pos), dtype=np.int32)[:])
        queue_photon_index[len(photons.pos):].set(
            -1 * np.ones(len(photons.pos), dtype=np.int32))
        queue_slot_flag[0:len(photons.pos)].set(
            np.ones(len(photons.pos), dtype=np.int32)[:])
        a = ga.zeros(1, dtype=ga.vec.uint4)
        b = np.array(1, dtype=np.int32)
        c = np.array(1, dtype=np.uint32)

        max_nodes_can_store = (max_shared_nodes - 20 - 3 * workgroupsize)
        max_nodes_can_store -= max_nodes_can_store % 32
        max_nodes_can_store = np.int32(max_nodes_can_store)

        loaded_node_start_index = np.int32(0)
        loaded_node_end_index = np.int32(1)
        node_front_start = ga.empty(1, dtype=np.int32)
        node_front_end = ga.empty(1, dtype=np.int32)

        max_loops = 1000

        if len(gpugeo.extra_nodes) > 1:
            raise RuntimeError('did not plan for there to be a node split.')

        print photon_current_node
        print photon_tested_node
        print queue_photon_index
        print queue_slot_flag

        print "Starting node range: ", loaded_node_start_index, " to ", loaded_node_end_index
        print "Max nodes in shared: ", max_nodes_can_store
        print "Work group nodes size: ", a.nbytes * workgroupsize, " bytes = (", a.nbytes, "*", workgroupsize, ")"
        print "Available local memsize: ", self.shared_mem_size
        print "Total number of nodes: ", len(
            nodes), " (", nodes.nbytes, " bytes)"
        print "Stored node size: ", max_nodes_can_store * a.nbytes
        print "Left over: ", self.shared_mem_size - max_nodes_can_store * a.nbytes - a.nbytes * workgroupsize
        print sim.detector.bvh.layer_bounds

        print "PRESUB CURRENT NODES"
        print photon_current_node
        print "PRESUB TESTED NODES"
        print photon_tested_node
        print "STARTING QUEUE"
        print queue_photon_index

        start_queue = time.time()
        gpu_funcs.checknode(np.int32(max_loops),
                            photon_pos,
                            photon_dir,
                            photon_current_node,
                            photon_tested_node,
                            photon_last_result,
                            np.int32(len(nodes)),
                            nodes,
                            node_parent,
                            node_first_daughter,
                            node_sibling,
                            node_aunt,
                            world_origin,
                            world_scale,
                            queue_size,
                            queue_photon_index,
                            queue_slot_flag,
                            np.int32(len(photon_pos)),
                            max_nodes_can_store,
                            loaded_node_start_index,
                            loaded_node_end_index,
                            node_front_start,
                            node_front_end,
                            block=(workgroupsize, 1, 1),
                            grid=(1, 1),
                            shared=4 *
                            (7 * max_nodes_can_store + 3 * workgroupsize + 1))
        cuda.Context.get_current().synchronize()
        end_queue = time.time()

        nactive = len(np.argwhere(queue_slot_flag.get() == 1))

        print "CheckNode Queue returns. ", end_queue - start_queue, " seconds"
        print "(Current node, To Test)"
        node_states = zip(photon_current_node.get(), photon_tested_node.get(),
                          photon_last_result.get())
        for x in xrange(0, len(node_states), 10):
            y = x + 10
            if y > len(node_states):
                y = len(node_states)
            print x, ": ", node_states[x:y]

        print "LAST RESULT:"
        np_photon_results = photon_last_result.get()
        for x in xrange(0, len(np_photon_results), 10):
            y = x + 10
            if y > len(np_photon_results):
                y = len(np_photon_results)
            print x, ": ", np_photon_results[x:y]

        print "PHOTON QUEUE"
        photon_queue = queue_photon_index.get()
        for x in xrange(0, len(photon_queue), 10):
            y = x + 10
            if y > len(photon_queue):
                y = len(photon_queue)
            print x, ": ", photon_queue[x:y]

        print "QUEUE SLOT FLAGS: ", nactive, " threads"
        slot_flags = queue_slot_flag.get()
        for x in xrange(0, len(slot_flags), 10):
            y = x + 10
            if y > len(slot_flags):
                y = len(slot_flags)
            print x, ": ", slot_flags[x:y]

        print "NODE FRONT: ", node_front_start.get(
        ), " to ", node_front_end.get(
        ), node_front_end.get() - node_front_start.get()