Python GPUFuncs.run_daq示例

class GPUDaq(object):
    def __init__(self, gpu_detector, ndaq=1, cl_context=None, cl_queue=None):
        if api.is_gpu_api_cuda():
            self.earliest_time_gpu = ga.empty(gpu_detector.nchannels * ndaq,
                                              dtype=np.float32)
            self.earliest_time_int_gpu = ga.empty(gpu_detector.nchannels *
                                                  ndaq,
                                                  dtype=np.uint32)
            self.channel_history_gpu = ga.zeros_like(
                self.earliest_time_int_gpu)
            self.channel_q_int_gpu = ga.zeros_like(self.earliest_time_int_gpu)
            self.channel_q_gpu = ga.zeros(len(self.earliest_time_int_gpu),
                                          dtype=np.float32)
            self.detector_gpu = gpu_detector.detector_gpu
            self.module = cutools.get_cu_module('daq.cu',
                                                options=api_options,
                                                include_source_directory=True)
        elif api.is_gpu_api_opencl():
            self.earliest_time_gpu = ga.empty(cl_queue,
                                              gpu_detector.nchannels * ndaq,
                                              dtype=np.float32)
            self.earliest_time_int_gpu = ga.empty(cl_queue,
                                                  gpu_detector.nchannels *
                                                  ndaq,
                                                  dtype=np.uint32)
            self.channel_history_gpu = ga.zeros(cl_queue,
                                                gpu_detector.nchannels * ndaq,
                                                dtype=np.uint32)
            self.channel_q_int_gpu = ga.zeros(cl_queue,
                                              gpu_detector.nchannels * ndaq,
                                              dtype=np.uint32)
            self.channel_q_gpu = ga.zeros(cl_queue,
                                          gpu_detector.nchannels * ndaq,
                                          dtype=np.float32)
            self.detector_gpu = gpu_detector  # struct not made in opencl mode, so we keep a copy of the class
            self.module = cltools.get_cl_module('daq.cl',
                                                cl_context,
                                                options=api_options,
                                                include_source_directory=True)
        else:
            raise RuntimeError("GPU API is neither CUDA nor OpenCL")

        self.solid_id_map_gpu = gpu_detector.solid_id_map
        self.solid_id_to_channel_index_gpu = gpu_detector.solid_id_to_channel_index_gpu
        self.gpu_funcs = GPUFuncs(self.module)
        self.ndaq = ndaq
        self.stride = gpu_detector.nchannels

    def begin_acquire(self, nthreads_per_block=64, cl_context=None):
        if api.is_gpu_api_cuda():
            self.gpu_funcs.reset_earliest_time_int(
                np.float32(1e9),
                np.int32(len(self.earliest_time_int_gpu)),
                self.earliest_time_int_gpu,
                block=(nthreads_per_block, 1, 1),
                grid=(len(self.earliest_time_int_gpu) // nthreads_per_block +
                      1, 1))
            self.channel_q_int_gpu.fill(0)
            self.channel_q_gpu.fill(0)
            self.channel_history_gpu.fill(0)
        elif api.is_gpu_api_opencl():
            comqueue = cl.CommandQueue(cl_context)
            self.gpu_funcs.reset_earliest_time_int(
                comqueue, (nthreads_per_block, 1, 1),
                (len(self.earliest_time_int_gpu) // nthreads_per_block + 1, 1),
                np.float32(1e9),
                np.int32(len(self.earliest_time_int_gpu)),
                self.earliest_time_int_gpu.data,
                g_times_l=True).wait()
            self.channel_q_int_gpu.fill(0, queue=comqueue)
            self.channel_q_gpu.fill(0, queue=comqueue)
            self.channel_history_gpu.fill(0, queue=comqueue)
            cl.enqueue_barrier(comqueue)

    def acquire(self,
                gpuphotons,
                rng_states,
                nthreads_per_block=64,
                max_blocks=1024,
                start_photon=None,
                nphotons=None,
                weight=1.0,
                cl_context=None):
        if start_photon is None:
            start_photon = 0
        if nphotons is None:
            nphotons = len(gpuphotons.pos) - start_photon

        if api.is_gpu_api_opencl():
            comqueue = cl.CommandQueue(cl_context)
            clmaxblocks = max_blocks

        if self.ndaq == 1:
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                if api.is_gpu_api_cuda():
                    self.gpu_funcs.run_daq(rng_states,
                                           np.uint32(0x1 << 2),
                                           np.int32(start_photon +
                                                    first_photon),
                                           np.int32(photons_this_round),
                                           gpuphotons.t,
                                           gpuphotons.flags,
                                           gpuphotons.last_hit_triangles,
                                           gpuphotons.weights,
                                           self.solid_id_map_gpu,
                                           self.detector_gpu,
                                           self.earliest_time_int_gpu,
                                           self.channel_q_int_gpu,
                                           self.channel_history_gpu,
                                           np.float32(weight),
                                           block=(nthreads_per_block, 1, 1),
                                           grid=(blocks, 1))
                elif api.is_gpu_api_opencl():
                    #print "daq: ",start_photon,first_photon,start_photon+first_photon,(photons_this_round/nthreads_per_block,1,1), (nthreads_per_block,1,1)
                    self.gpu_funcs.run_daq(
                        comqueue,
                        (photons_this_round / nthreads_per_block, 1, 1),
                        (nthreads_per_block, 1, 1),
                        rng_states.data,
                        np.uint32(0x1 << 2),
                        np.int32(start_photon + first_photon),
                        np.int32(photons_this_round),
                        gpuphotons.t.data,
                        gpuphotons.flags.data,
                        gpuphotons.last_hit_triangles.data,
                        gpuphotons.weights.data,
                        self.solid_id_map_gpu.data,
                        # -- Detector struct --
                        self.solid_id_to_channel_index_gpu.data,
                        self.detector_gpu.time_cdf_x_gpu.data,
                        self.detector_gpu.time_cdf_y_gpu.data,
                        self.detector_gpu.charge_cdf_x_gpu.data,
                        self.detector_gpu.charge_cdf_y_gpu.data,
                        self.detector_gpu.nchannels,
                        self.detector_gpu.time_cdf_len,
                        self.detector_gpu.charge_cdf_len,
                        self.detector_gpu.charge_unit,
                        # ---------------------
                        self.earliest_time_int_gpu.data,
                        self.channel_q_int_gpu.data,
                        self.channel_history_gpu.data,
                        np.float32(weight),
                        g_times_l=True).wait()

        else:
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, 1, max_blocks):
                if api.is_gpu_api_cuda():
                    self.gpu_funcs.run_daq_many(
                        rng_states,
                        np.uint32(0x1 << 2),
                        np.int32(start_photon + first_photon),
                        np.int32(photons_this_round),
                        gpuphotons.t,
                        gpuphotons.flags,
                        gpuphotons.last_hit_triangles,
                        gpuphotons.weights,
                        self.solid_id_map_gpu,
                        self.detector_gpu,
                        self.earliest_time_int_gpu,
                        self.channel_q_int_gpu,
                        self.channel_history_gpu,
                        np.int32(self.ndaq),
                        np.int32(self.stride),
                        np.float32(weight),
                        block=(nthreads_per_block, 1, 1),
                        grid=(blocks, 1))
                elif api.is_gpu_api_opencl():
                    self.gpu_funcs.run_daq_many(
                        comqueue,
                        (nthreads_per_block, 1, 1),
                        (blocks, 1),
                        np.int32(start_photon + first_photon),
                        np.int32(photons_this_round),
                        gpuphotons.t.data,
                        gpuphotons.flags.data,
                        gpuphotons.last_hit_triangles.data,
                        gpuphotons.weights.data,
                        self.solid_id_map_gpu,
                        # -- Detector Struct --
                        self.solid_id_to_channel_index_gpu.data,
                        self.detector_gpu.time_cdf_x_gpu.data,
                        self.detector_gpu.time_cdf_y_gpu.data,
                        self.detector_gpu.charge_cdf_x_gpu.data,
                        self.detector_gpu.charge_cdf_y_gpu.data,
                        self.detector_gpu.nchannels,
                        self.detector_gpu.time_cdf_len,
                        self.detector_gpu.charge_cdf_len,
                        self.detector_gpu.charge_unit,
                        # ---------------------
                        self.earliest_time_int_gpu.data,
                        self.channel_q_int_gpu.data,
                        self.channel_history_gpu.data,
                        np.int32(self.ndaq),
                        np.int32(self.stride),
                        np.float32(weight),
                        g_times_l=True).wait()
        if api.is_gpu_api_cuda():
            cuda.Context.get_current().synchronize()
        elif api.is_gpu_api_opencl():
            cl.enqueue_barrier(comqueue)

    def end_acquire(self, nthreads_per_block=64, cl_context=None):
        if api.is_gpu_api_cuda():
            self.gpu_funcs.convert_sortable_int_to_float(
                np.int32(len(self.earliest_time_int_gpu)),
                self.earliest_time_int_gpu,
                self.earliest_time_gpu,
                block=(nthreads_per_block, 1, 1),
                grid=(len(self.earliest_time_int_gpu) // nthreads_per_block +
                      1, 1))
            self.gpu_funcs.convert_charge_int_to_float(
                self.detector_gpu,
                self.channel_q_int_gpu,
                self.channel_q_gpu,
                block=(nthreads_per_block, 1, 1),
                grid=(len(self.channel_q_int_gpu) // nthreads_per_block + 1,
                      1))
            cuda.Context.get_current().synchronize()
        elif api.is_gpu_api_opencl():
            print cl_context, nthreads_per_block
            comqueue = cl.CommandQueue(cl_context)
            self.gpu_funcs.convert_sortable_int_to_float(
                comqueue, (len(self.earliest_time_int_gpu), 1, 1),
                (nthreads_per_block, 1, 1),
                np.int32(len(self.earliest_time_int_gpu)),
                self.earliest_time_int_gpu.data,
                self.earliest_time_gpu.data,
                g_times_l=True).wait()
            self.gpu_funcs.convert_charge_int_to_float(
                comqueue, (len(self.channel_q_int_gpu), 1, 1),
                (nthreads_per_block, 1, 1),
                self.detector_gpu.nchannels,
                self.detector_gpu.charge_unit,
                self.channel_q_int_gpu.data,
                self.channel_q_gpu.data,
                g_times_l=True).wait()

        return GPUChannels(self.earliest_time_gpu, self.channel_q_gpu,
                           self.channel_history_gpu, self.ndaq, self.stride)

示例#2

显示文件

class GPUDaqLAr1ND(GPUDAQHist):
    """ DAQ that stores histogram of photon hits."""
    NTDC = None
    NS_PER_TDC = None

    def __init__(self,
                 gpu_detector,
                 ntdcs=None,
                 ns_per_tdc=None,
                 adc_bits=None,
                 ndaq=1,
                 cl_context=None,
                 cl_queue=None):
        """constructor.
        
        Args:
          gpu_detector: GPUDetector
        Keywords:
          ntdcs: int
            number of time bins per channel
            if not supplied, using class variable value
          ns_per_tdc: float
            nanoseconds per time bin
            if not supplied, using class variable value
          adc_bits:  int
            number of ADC bits (not used yet)
          ndaq: int
            number of daqs
          cl_context: pyopencl.Context
          cl_queue: pyopencl.CommandQueue
        Raises:
          ValueError when ntdcs and ns_per_tdc are found to be NoneType
        """
        if ntdcs == None:
            self.ntdcs = GPUDaqLAr1ND.NTDC
        if ns_per_tdc == None:
            self.ns_per_tdc = GPUDaqLAr1ND.NS_PER_TDC
        super(GPUDaqLAr1ND, self).__init__(gpu_detector,
                                           ntdcs=self.ntdcs,
                                           ns_per_tdc=self.ns_per_tdc,
                                           adc_bits=adc_bits,
                                           ndaq=ndaq,
                                           cl_context=cl_context,
                                           cl_queue=cl_queue)
        if self.ntdcs == None:
            raise ValueError("GPUDaqLAr1ND.NTDC has not been set.")
        if self.ns_per_tdc == None:
            raise ValueError("GPUDaqLAr1ND.NS_PER_TDC has not been set.")

        kernel_filepath = os.path.dirname(
            os.path.realpath(__file__)) + "/daq_lar1nd"
        if api.is_gpu_api_cuda():
            self.module = cutools.get_cu_module(kernel_filepath + ".cu",
                                                options=api_options,
                                                include_source_directory=True)
        elif api.is_gpu_api_opencl():
            self.module = cltools.get_cl_module(kernel_filepath + '.cl',
                                                cl_context,
                                                options=api_options,
                                                include_source_directory=True)
        else:
            raise RuntimeError("GPU API is neither CUDA nor OpenCL")

        self.gpu_funcs = GPUFuncs(self.module)

    def acquire(self,
                gpuphotons,
                rng_states,
                nthreads_per_block=64,
                max_blocks=1024,
                start_photon=None,
                nphotons=None,
                weight=1.0,
                cl_context=None):
        """run UBooNE DAQ acquire kernels"""
        if start_photon is None:
            start_photon = 0
        if nphotons is None:
            nphotons = len(gpuphotons.pos) - start_photon

        if api.is_gpu_api_opencl():
            comqueue = cl.CommandQueue(cl_context)
            clmaxblocks = max_blocks

        # We loop over all photons and bin them essentially
        if self.ndaq == 1:
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                if api.is_gpu_api_cuda():
                    self.gpu_funcs.run_daq(rng_states,
                                           np.uint32(event.SURFACE_DETECT),
                                           np.int32(start_photon +
                                                    first_photon),
                                           np.int32(photons_this_round),
                                           gpuphotons.t,
                                           gpuphotons.flags,
                                           gpuphotons.last_hit_triangles,
                                           gpuphotons.weights,
                                           self.solid_id_map_gpu,
                                           self.detector_gpu,
                                           self.adc_gpu,
                                           np.int32(self.nchannels),
                                           np.int32(self.ntdcs),
                                           np.float32(self.ns_per_tdc),
                                           np.float32(100.0),
                                           self.channel_history_gpu,
                                           np.float32(weight),
                                           block=(nthreads_per_block, 1, 1),
                                           grid=(blocks, 1))
                elif api.is_gpu_api_opencl():
                    self.gpu_funcs.run_daq(
                        comqueue,
                        (photons_this_round, 1, 1),
                        None,
                        rng_states.data,
                        np.uint32(0x1 << 2),
                        np.int32(start_photon + first_photon),
                        np.int32(nphotons),
                        gpuphotons.t.data,
                        gpuphotons.pos.data,
                        gpuphotons.flags.data,
                        gpuphotons.last_hit_triangles.data,
                        gpuphotons.weights.data,
                        self.solid_id_map_gpu.data,
                        # -- Detector struct --
                        self.solid_id_to_channel_index_gpu.data,
                        # ---------------------
                        self.uint_adc_gpu.data,
                        np.int32(self.nchannels),
                        np.int32(self.ntdcs),
                        np.float32(self.ns_per_tdc),
                        np.float32(100.0),
                        self.channel_history_gpu.data,
                        # -- Channel transforms --
                        self.channel_inverse_rot_gpu.data,
                        self.channel_inverse_trans_gpu.data,
                        # ------------------------
                        np.float32(weight),
                        g_times_l=False).wait()
            # if opencl, need to convert ADC from uint to float
            if api.is_gpu_api_opencl():
                self.gpu_funcs.convert_adc(comqueue,
                                           (int(self.nchannels), 1, 1),
                                           None,
                                           self.uint_adc_gpu.data,
                                           self.adc_gpu.data,
                                           np.int32(self.nchannels),
                                           np.int32(self.ntdcs),
                                           g_times_l=False).wait()

        else:
            raise RunTimeError("Multi-DAQ not built")
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, 1, max_blocks):
                if api.is_gpu_api_cuda():
                    self.gpu_funcs.run_daq_many(
                        rng_states,
                        np.uint32(0x1 << 2),
                        np.int32(start_photon + first_photon),
                        np.int32(photons_this_round),
                        gpuphotons.t,
                        gpuphotons.flags,
                        gpuphotons.last_hit_triangles,
                        gpuphotons.weights,
                        self.solid_id_map_gpu,
                        self.detector_gpu,
                        self.earliest_time_int_gpu,
                        self.channel_q_int_gpu,
                        self.channel_history_gpu,
                        np.int32(self.ndaq),
                        np.int32(self.stride),
                        np.float32(weight),
                        block=(nthreads_per_block, 1, 1),
                        grid=(blocks, 1))
                elif api.is_gpu_api_opencl():
                    self.gpu_funcs.run_daq_many(
                        comqueue,
                        (nthreads_per_block, 1, 1),
                        (blocks, 1),
                        np.int32(start_photon + first_photon),
                        np.int32(photons_this_round),
                        gpuphotons.t.data,
                        gpuphotons.flags.data,
                        gpuphotons.last_hit_triangles.data,
                        gpuphotons.weights.data,
                        self.solid_id_map_gpu,
                        # -- Detector Struct --
                        self.solid_id_to_channel_index_gpu.data,
                        self.detector_gpu.time_cdf_x_gpu.data,
                        self.detector_gpu.time_cdf_y_gpu.data,
                        self.detector_gpu.charge_cdf_x_gpu.data,
                        self.detector_gpu.charge_cdf_y_gpu.data,
                        self.detector_gpu.nchannels,
                        self.detector_gpu.time_cdf_len,
                        self.detector_gpu.charge_cdf_len,
                        self.detector_gpu.charge_unit,
                        # ---------------------
                        self.earliest_time_int_gpu.data,
                        self.channel_q_int_gpu.data,
                        self.channel_history_gpu.data,
                        np.int32(self.ndaq),
                        np.int32(self.stride),
                        np.float32(weight),
                        g_times_l=True).wait()
        if api.is_gpu_api_cuda():
            cuda.Context.get_current().synchronize()
        elif api.is_gpu_api_opencl():
            cl.enqueue_barrier(comqueue)

    def end_acquire(self, nthreads_per_block=64, cl_context=None):
        """collect daq info and make GPUChannels instance.
        
        Args:
          nthreads_per_block: int
          cl_context: pyopenc.Context
        Returns:
          GPUChannels
        """
        if api.is_gpu_api_cuda():
            self.earliest_time_gpu = ga.zeros(self.nchannels, dtype=np.float32)
            nblocks = int(self.nchannels / nthreads_per_block) + 1
            self.gpu_funcs.get_earliest_hit_time(np.int32(self.nchannels),
                                                 np.int32(self.ntdcs),
                                                 np.float32(self.ns_per_tdc),
                                                 self.adc_gpu,
                                                 self.channel_history_gpu,
                                                 self.earliest_time_gpu,
                                                 block=(1000, 1, 1),
                                                 grid=(1, 1))
            self.adc_gpu.get()
        elif api.is_gpu_api_opencl():
            comqueue = cl.CommandQueue(cl_context)
            self.earliest_time_gpu = ga.zeros(comqueue,
                                              self.nchannels,
                                              dtype=np.float32)
            self.gpu_funcs.get_earliest_hit_time(
                comqueue, (int(self.nchannels), 1, 1), None,
                np.int32(self.nchannels), np.int32(self.ntdcs),
                np.float32(self.ns_per_tdc), self.adc_gpu.data,
                self.channel_history_gpu.data,
                self.earliest_time_gpu.data).wait()
            self.adc_gpu.get()

        return GPUChannels(self.earliest_time_gpu, self.adc_gpu,
                           self.channel_history_gpu, self.ndaq, self.stride)

    @classmethod
    def build_daq(cls, gpu_geometry, cl_context=None, cl_queue=None):
        """factory method.

        will be called by chroma.Simulation to build DAQ instance.
        Returns:
          GPUDaqLAr1ND instance
        """
        return GPUDaqLAr1ND(gpu_geometry,
                            cl_context=cl_context,
                            cl_queue=cl_queue)