示例#1
0
def propagate(gpu_detector,
              number=10,
              nphotons=500000,
              nthreads_per_block=64,
              max_blocks=1024):
    "Returns the average number of photons propagated on the GPU per second."
    rng_states = gpu.get_rng_states(nthreads_per_block * max_blocks)

    run_times = []
    for i in tools.progress(list(range(number))):
        pos = np.zeros((nphotons, 3))
        dir = sample.uniform_sphere(nphotons)
        reorder = tools.argsort_direction(dir)
        dir = dir[reorder]
        pol = normalize(np.cross(sample.uniform_sphere(nphotons), dir))
        wavelengths = np.random.uniform(400, 800, size=nphotons)
        photons = event.Photons(pos, dir, pol, wavelengths)
        gpu_photons = gpu.GPUPhotons(photons)

        t0 = time.time()
        gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block,
                              max_blocks)
        cuda.Context.get_current().synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons / ufloat((np.mean(run_times), np.std(run_times)))
示例#2
0
def make_photon_with_arrays(size):
    '''Returns a new chroma.event.Photons object for `size` number of
    photons with empty arrays set for all the photon attributes.'''
    return event.Photons(pos=np.empty((size,3), dtype=np.float32),
                         dir=np.empty((size,3), dtype=np.float32),
                         pol=np.empty((size,3), dtype=np.float32),
                         wavelengths=np.empty(size, dtype=np.float32),
                         t=np.empty(size, dtype=np.float32),
                         flags=np.empty(size, dtype=np.uint32),
                         last_hit_triangles=np.empty(size, dtype=np.int32))
示例#3
0
 def get(self):
     pos = self.pos.get().view(np.float32).reshape((len(self.pos), 3))
     dir = self.dir.get().view(np.float32).reshape((len(self.dir), 3))
     pol = self.pol.get().view(np.float32).reshape((len(self.pol), 3))
     wavelengths = self.wavelengths.get()
     t = self.t.get()
     last_hit_triangles = self.last_hit_triangles.get()
     flags = self.flags.get()
     weights = self.weights.get()
     return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles,
                          flags, weights)
示例#4
0
 def get(self):
     ncols = 3
     if api.is_gpu_api_opencl():
         ncols = 4  # must include padding
     pos = self.pos.get().view(np.float32).reshape((len(self.pos), ncols))
     dir = self.dir.get().view(np.float32).reshape((len(self.dir), ncols))
     pol = self.pol.get().view(np.float32).reshape((len(self.pol), ncols))
     wavelengths = self.wavelengths.get()
     t = self.t.get()
     last_hit_triangles = self.last_hit_triangles.get()
     flags = self.flags.get()
     weights = self.weights.get()
     return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles,
                          flags, weights)
示例#5
0
    def get(self, npl=0, hit=0):
        log.info("get npl:%d hit:%d " % (npl, hit))
        pos = self.pos.get().view(np.float32).reshape((len(self.pos), 3))
        dir = self.dir.get().view(np.float32).reshape((len(self.dir), 3))
        pol = self.pol.get().view(np.float32).reshape((len(self.pol), 3))
        wavelengths = self.wavelengths.get()
        t = self.t.get()
        last_hit_triangles = self.last_hit_triangles.get()
        flags = self.flags.get()
        weights = self.weights.get()

        if npl:
            nall = len(pos)
            a = np.zeros((nall, 4, 4), dtype=np.float32)

            a[:, 0, :3] = pos
            a[:, 0, 3] = t

            a[:, 1, :3] = dir
            a[:, 1, 3] = wavelengths

            a[:, 2, :3] = pol
            a[:, 2, 3] = weights

            assert len(last_hit_triangles) == len(flags)
            pmtid = np.zeros(nall, dtype=np.int32)

            # a kludge setting of pmtid into lht using the map argument of propagate_hit.cu
            SURFACE_DETECT = 0x1 << 2
            detected = np.where(flags & SURFACE_DETECT)
            pmtid[detected] = last_hit_triangles[
                detected]  # sparsely populate, leaving zeros for undetected

            a[:, 3, 0] = np.arange(nall,
                                   dtype=np.int32).view(a.dtype)  # photon_id
            a[:, 3, 1] = 0  # used in comparison againt vbo prop
            a[:, 3, 2] = flags.view(a.dtype)  # history flags
            a[:, 3, 3] = pmtid.view(a.dtype)  # channel_id ie PmtId

            if hit:
                return a[pmtid > 0].view(NPY)
            else:
                return a.view(NPY)
            pass
        else:  # the old way
            return event.Photons(pos, dir, pol, wavelengths, t,
                                 last_hit_triangles, flags, weights)
示例#6
0
def load_photons(number=100, nphotons=500000):
    """Returns the average number of photons moved to the GPU device memory
    per second."""
    pos = np.zeros((nphotons, 3))
    dir = sample.uniform_sphere(nphotons)
    pol = normalize(np.cross(sample.uniform_sphere(nphotons), dir))
    wavelengths = np.random.uniform(400, 800, size=nphotons)
    photons = event.Photons(pos, dir, pol, wavelengths)

    run_times = []
    for i in tools.progress(list(range(number))):
        t0 = time.time()
        gpu_photons = gpu.GPUPhotons(photons)
        cuda.Context.get_current().synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons / ufloat((np.mean(run_times), np.std(run_times)))
示例#7
0
    def get_flat_hits(self,
                      gpu_detector,
                      target_flag=(0x1 << 2),
                      nthreads_per_block=64,
                      max_blocks=1024,
                      start_photon=None,
                      nphotons=None,
                      no_map=False):
        '''GPUPhoton objects containing only photons that
        have a particular bit set in their history word and were detected by
        a channel.'''
        cuda.Context.get_current().synchronize()
        index_counter_gpu = ga.zeros(shape=1, dtype=np.uint32)
        cuda.Context.get_current().synchronize()
        if start_photon is None:
            start_photon = 0
        if nphotons is None:
            nphotons = self.pos.size - start_photon

        # First count how much space we need
        for first_photon, photons_this_round, blocks in chunk_iterator(
                nphotons, nthreads_per_block, max_blocks):
            self.gpu_funcs.count_photon_hits(np.int32(start_photon +
                                                      first_photon),
                                             np.int32(photons_this_round),
                                             np.uint32(target_flag),
                                             self.flags,
                                             gpu_detector.solid_id_map,
                                             self.last_hit_triangles,
                                             gpu_detector.detector_gpu,
                                             index_counter_gpu,
                                             block=(nthreads_per_block, 1, 1),
                                             grid=(blocks, 1))
        cuda.Context.get_current().synchronize()
        reduced_nphotons = int(index_counter_gpu.get()[0])

        # Then allocate new storage space
        pos = ga.empty(shape=reduced_nphotons, dtype=ga.vec.float3)
        dir = ga.empty(shape=reduced_nphotons, dtype=ga.vec.float3)
        pol = ga.empty(shape=reduced_nphotons, dtype=ga.vec.float3)
        wavelengths = ga.empty(shape=reduced_nphotons, dtype=np.float32)
        t = ga.empty(shape=reduced_nphotons, dtype=np.float32)
        last_hit_triangles = ga.empty(shape=reduced_nphotons, dtype=np.int32)
        flags = ga.empty(shape=reduced_nphotons, dtype=np.uint32)
        weights = ga.empty(shape=reduced_nphotons, dtype=np.float32)
        evidx = ga.empty(shape=reduced_nphotons, dtype=np.uint32)
        channels = ga.empty(shape=reduced_nphotons, dtype=np.int32)

        # And finaly copy hits, if there are any
        if reduced_nphotons > 0:
            index_counter_gpu.fill(0)
            for first_photon, photons_this_round, blocks in \
                    chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                self.gpu_funcs.copy_photon_hits(
                    np.int32(start_photon + first_photon),
                    np.int32(photons_this_round),
                    np.uint32(target_flag),
                    gpu_detector.solid_id_map,
                    gpu_detector.detector_gpu,
                    index_counter_gpu,
                    self.pos,
                    self.dir,
                    self.wavelengths,
                    self.pol,
                    self.t,
                    self.flags,
                    self.last_hit_triangles,
                    self.weights,
                    self.evidx,
                    pos,
                    dir,
                    wavelengths,
                    pol,
                    t,
                    flags,
                    last_hit_triangles,
                    weights,
                    evidx,
                    channels,
                    block=(nthreads_per_block, 1, 1),
                    grid=(blocks, 1))
            assert index_counter_gpu.get()[0] == reduced_nphotons

        pos = pos.get().view(np.float32).reshape((len(pos), 3))
        dir = dir.get().view(np.float32).reshape((len(dir), 3))
        pol = pol.get().view(np.float32).reshape((len(pol), 3))
        wavelengths = wavelengths.get()
        t = t.get()
        last_hit_triangles = last_hit_triangles.get()
        flags = flags.get()
        weights = weights.get()
        evidx = evidx.get()
        channels = channels.get()
        hitmap = {}
        return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles,
                             flags, weights, evidx, channels)
示例#8
0
    def _simulate_batch(self,
                        batch_events,
                        keep_photons_beg=False,
                        keep_photons_end=False,
                        keep_hits=True,
                        run_daq=False,
                        max_steps=100,
                        verbose=False):
        '''Assumes batch_events is a list of Event objects with photons_beg having evidx set to the index in the array.
           
           Yields the fully formed events. Do not call directly.'''

        t_start = timer()

        #Idea: allocate memory on gpu and copy photons into it, instead of concatenating on CPU?
        batch_photons = event.Photons.join(
            [ev.photons_beg for ev in batch_events])
        batch_bounds = np.cumsum(
            np.concatenate([[0],
                            [len(ev.photons_beg) for ev in batch_events]]))

        #This copy to gpu has a _lot_ of overhead, want 100k photons at least, hence batches
        #Assume triangles, and weights are unimportant to copy to GPU
        t_copy_start = timer()
        gpu_photons = gpu.GPUPhotons(batch_photons,
                                     copy_triangles=False,
                                     copy_weights=False)
        t_copy_end = timer()
        if verbose:
            print('GPU copy took %0.2f s' % (t_copy_end - t_copy_start))

        t_prop_start = timer()
        tracking = gpu_photons.propagate(
            self.gpu_geometry,
            self.rng_states,
            nthreads_per_block=self.nthreads_per_block,
            max_blocks=self.max_blocks,
            max_steps=max_steps,
            track=self.photon_tracking)

        t_prop_end = timer()
        if verbose:
            print('GPU propagate took %0.2f s' % (t_prop_end - t_prop_start))

        t_end = timer()
        if verbose:
            print('Batch took %0.2f s' % (t_end - t_start))

        if keep_photons_end:
            batch_photons_end = gpu_photons.get()

        if hasattr(self.detector, 'num_channels') and keep_hits:
            batch_hits = gpu_photons.get_hits(self.gpu_geometry)

        for i, (batch_ev, (start_photon, end_photon)) in enumerate(
                zip(batch_events, zip(batch_bounds[:-1], batch_bounds[1:]))):

            if not keep_photons_beg:
                batch_ev.photons_beg = None

            if self.photon_tracking:
                step_photon_ids, step_photons = tracking
                nphotons = end_photon - start_photon
                photon_tracks = [[] for i in range(nphotons)]
                for step_ids, step_photons in zip(step_photon_ids,
                                                  step_photons):
                    mask = np.logical_and(step_ids >= start_photon,
                                          step_ids < end_photon)
                    if np.count_nonzero(mask) == 0:
                        break
                    photon_ids = step_ids[mask] - start_photon
                    photons = step_photons[mask]
                    #Indexing Photons with a scalar changes the internal array shapes...
                    any(photon_tracks[id].append(photons[i])
                        for i, id in enumerate(photon_ids))
                batch_ev.photon_tracks = [
                    event.Photons.join(photons, concatenate=False)
                    if len(photons) > 0 else event.Photons()
                    for photons in photon_tracks
                ]

            if keep_photons_end:
                batch_ev.photons_end = batch_photons_end[
                    start_photon:end_photon]

            if hasattr(self.detector, 'num_channels') and keep_hits:
                #Thought: this is kind of expensive computationally, but keep_hits is for diagnostics
                batch_ev.hits = {
                    chan: batch_hits[chan][batch_hits[chan].evidx == i]
                    for chan in batch_hits
                }
                batch_ev.hits = {
                    chan: batch_ev.hits[chan]
                    for chan in batch_ev.hits if len(batch_ev.hits[chan]) > 0
                }

            if hasattr(self, 'gpu_daq') and run_daq:
                #Must run DAQ per event, or design a much more complicated daq algorithm
                self.gpu_daq.begin_acquire()
                self.gpu_daq.acquire(
                    gpu_photons,
                    self.rng_states,
                    start_photon=start_photon,
                    nphotons=(end_photon - start_photon),
                    nthreads_per_block=self.nthreads_per_block,
                    max_blocks=self.max_blocks)
                gpu_channels = self.gpu_daq.end_acquire()
                batch_ev.channels = gpu_channels.get()

            yield batch_ev