Exemplos de stream_synchronize em Python, exemplos de bifrost.device.stream_synchronize em Python

Exemplo n.º 1

0

Exibir arquivo

    def test_2d_active(self):
        shape = self.shape2D
        known_data = np.random.normal(size=shape).astype(np.float32).view(
            np.complex64)
        idata = bf.ndarray(known_data, space='cuda_managed')
        odata = bf.empty_like(idata)
        coeffs = self.coeffs * 1.0
        coeffs.shape += (1, )
        coeffs = np.repeat(coeffs, idata.shape[1], axis=1)
        coeffs.shape = (coeffs.shape[0], idata.shape[1])
        coeffs = bf.ndarray(coeffs, space='cuda_managed')

        fir = Fir()
        fir.init(coeffs, 1)
        fir.execute(idata, odata)
        fir.execute(idata, odata)
        stream_synchronize()

        for i in range(known_data.shape[1]):
            zf = lfiltic(self.coeffs, 1.0, 0.0)
            known_result, zf = lfilter(self.coeffs,
                                       1.0,
                                       known_data[:, i],
                                       zi=zf)
            known_result, zf = lfilter(self.coeffs,
                                       1.0,
                                       known_data[:, i],
                                       zi=zf)
            compare(odata[:, i], known_result)

Exemplo n.º 2

0

Exibir arquivo

    def run_test_r2c_dtype(self,
                           shape,
                           axes,
                           dtype=np.float32,
                           scale=1.,
                           misalign=0):
        known_data = np.random.normal(size=shape).astype(np.float32)
        known_data = (known_data * scale).astype(dtype)

        # Force misaligned data
        padded_shape = shape[:-1] + (shape[-1] + misalign, )
        known_data = np.resize(known_data, padded_shape)
        idata = bf.ndarray(known_data, space='cuda_managed')
        known_data = known_data[..., misalign:]
        idata = idata[..., misalign:]

        oshape = list(shape)
        oshape[axes[-1]] = shape[axes[-1]] // 2 + 1
        odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda_managed')
        fft = Fft()
        fft.init(idata, odata, axes=axes)
        fft.execute(idata, odata)
        stream_synchronize()
        known_result = gold_rfftn(known_data.astype(np.float32) / scale,
                                  axes=axes)
        compare(odata, known_result)

Exemplo n.º 3

0

Exibir arquivo

 def run_unpack_to_ci8_test(self, iarray):
     oarray = bf.ndarray(shape=iarray.shape,
                         dtype='ci8',
                         space='cuda_managed')
     oarray_known = bf.ndarray([[(0, 1),
                                 (2, 3)], [(4, 5),
                                           (6, 7)], [(-8, -7), (-6, -5)]],
                               dtype='ci8')
     bf.unpack(iarray.copy(space='cuda_managed'), oarray)
     stream_synchronize()
     np.testing.assert_equal(oarray, oarray_known)

Exemplo n.º 4

0

Exibir arquivo

 def run_reduce_test(self, shape, axis, n, op='sum', dtype=np.float32):
     a = ((np.random.random(size=shape) * 2 - 1) * 127).astype(
         np.int8).astype(dtype)
     if op[:3] == 'pwr':
         b_gold = pwrscrunch(a.astype(np.float32), n, axis, NP_OPS[op[3:]])
     else:
         b_gold = scrunch(a.astype(np.float32), n, axis, NP_OPS[op])
     a = bf.asarray(a, space='cuda_managed')
     b = bf.empty_like(b_gold, space='cuda_managed')
     bf.reduce(a, b, op)
     stream_synchronize()
     np.testing.assert_allclose(b, b_gold)

Exemplo n.º 5

0

Exibir arquivo

def copy_array(dst, src):
    dst_bf = asarray(dst)
    src_bf = asarray(src)
    if (space_accessible(dst_bf.bf.space, ['system'])
            and space_accessible(src_bf.bf.space, ['system'])):
        np.copyto(dst_bf, src_bf)
    else:
        _check(_bf.bfArrayCopy(dst_bf.as_BFarray(), src_bf.as_BFarray()))
        if dst_bf.bf.space != src_bf.bf.space:
            # TODO: Decide where/when these need to be called
            device.stream_synchronize()
    return dst

Exemplo n.º 6

0

Exibir arquivo

 def run_simple_test(self, x, funcstr, func):
     x_orig = x
     x = bf.asarray(x, 'cuda_managed')
     y = bf.empty_like(x)
     x.flags['WRITEABLE'] = False
     x.bf.immutable = True  # TODO: Is this actually doing anything? (flags is, just not sure about bf.immutable)
     for _ in range(3):
         bf.map(funcstr, {'x': x, 'y': y})
         stream_synchronize()
     if isinstance(x_orig, bf.ndarray):
         x_orig = x
     # Note: Using func(x) is dangerous because bf.ndarray does things like
     #         lazy .conj(), which break when used as if it were np.ndarray.
     np.testing.assert_equal(y, func(x_orig))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ndarray.py Projeto: ledatelescope/bifrost

 def copy(self, space=None, order='C'):
     if order != 'C':
         raise NotImplementedError('Only order="C" is supported')
     if space is None:
         space = self.bf.space
     if not self.flags['C_CONTIGUOUS']:
         # Deal with arrays that need to have their layouts changed
         # TODO: Is there a better way to handle this?
         if space_accessible(self.bf.space, ['system']):
             ## For arrays that can be accessed from the system space, use
             ## numpy.ndarray.copy() to do the heavy lifting
             if space == 'cuda_managed':
                 ## TODO: Decide where/when these need to be called
                 device.stream_synchronize()
             ## This actually makes two copies and throws one away
             temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space)
             temp[...] = np.array(self).copy()
             if self.bf.space != space:
                 return ndarray(temp, space=space)
             return temp
         else:
             ## For arrays that can be access from CUDA, use bifrost.transpose
             ## to do the heavy lifting
             ### Figure out the correct axis order for C
             permute = np.argsort(self.strides)[::-1]
             c_shape = [self.shape[p] for p in permute]
             ### Make a BFarray wrapper for self so we can reset shape/strides
             ### to what they should be for a C ordered array
             self_corder = self.as_BFarray()
             shape_type = ctypes.c_long*_bf.BF_MAX_DIMS
             self_corder.shape = shape_type(*c_shape)
             self_corder.strides = shape_type(*[self.strides[p] for p in permute])
             ### Make a temporary array with the right shape that will be C ordered
             temp = ndarray(shape=self.shape, dtype=self.dtype, space=self.bf.space)
             ### Run the transpose using the BFarray wrapper and the temporary array
             array_type = ctypes.c_int * self.ndim
             axes_array = array_type(*permute)
             _check(_bf.bfTranspose(self_corder, temp.as_BFarray(), axes_array))
             if self.bf.space != space:
                 return ndarray(temp, space=space)
             return temp
     # Note: This makes an actual copy as long as space is not None
     return ndarray(self, space=space)

Exemplo n.º 8

0

Exibir arquivo

 def main(self, orings):
     for sourcename in self.sourcenames:
         if self.shutdown_event.is_set():
             break
         with self.create_reader(sourcename) as ireader:
             oheaders = self.on_sequence(ireader, sourcename)
             for ohdr in oheaders:
                 if 'time_tag' not in ohdr:
                     ohdr['time_tag'] = self._seq_count
                 if 'name' not in ohdr:
                     ohdr['name'] = 'unnamed-sequence-%i' % self._seq_count
             self._seq_count += 1
             with ExitStack() as oseq_stack:
                 oseqs, ogulp_overlaps = self.begin_sequences(
                     oseq_stack,
                     orings,
                     oheaders,
                     igulp_nframes=[],
                     istride_nframes=[])
                 while not self.shutdown_event.is_set():
                     prev_time = time.time()
                     with ExitStack() as ospan_stack:
                         ospans = self.reserve_spans(ospan_stack, oseqs)
                         cur_time = time.time()
                         reserve_time = cur_time - prev_time
                         prev_time = cur_time
                         ostrides_actual = self.on_data(ireader, ospans)
                         device.stream_synchronize()
                         self.commit_spans(ospans, ostrides_actual,
                                           ogulp_overlaps)
                         # TODO: Is this an OK way to detect end-of-data?
                         if any(
                             [ostride == 0 for ostride in ostrides_actual]):
                             break
                     cur_time = time.time()
                     process_time = cur_time - prev_time
                     prev_time = cur_time
                     self.perf_proclog.update({
                         'acquire_time': -1,
                         'reserve_time': reserve_time,
                         'process_time': process_time
                     })

Exemplo n.º 9

0

Exibir arquivo

    def main(self, orings):
        for iseqs in izip(
                *
            [iring.read(guarantee=self.guarantee) for iring in self.irings]):
            if self.shutdown_event.is_set():
                break
            for i, iseq in enumerate(iseqs):
                self.sequence_proclogs[i].update(iseq.header)
            oheaders = self._on_sequence(iseqs)
            for ohdr in oheaders:
                if 'time_tag' not in ohdr:
                    ohdr['time_tag'] = self._seq_count
            self._seq_count += 1

            igulp_nframes = [
                self.gulp_nframe or iseq.header['gulp_nframe']
                for iseq in iseqs
            ]
            igulp_overlaps = self._define_input_overlap_nframe(iseqs)
            istride_nframes = igulp_nframes[:]
            igulp_nframes = [
                igulp_nframe + nframe_overlap
                for igulp_nframe, nframe_overlap in zip(
                    igulp_nframes, igulp_overlaps)
            ]

            for iseq, igulp_nframe in zip(iseqs, igulp_nframes):
                if self.buffer_factor is None:
                    src_block = iseq.ring.owner
                    if src_block is not None and self.is_fused_with(src_block):
                        buffer_factor = 1
                    else:
                        buffer_factor = None
                else:
                    buffer_factor = self.buffer_factor
                iseq.resize(gulp_nframe=igulp_nframe,
                            buf_nframe=self.buffer_nframe,
                            buffer_factor=buffer_factor)

            # TODO: Ever need to specify starting offset?
            iframe0s = [0 for _ in igulp_nframes]

            force_skip = False

            with ExitStack() as oseq_stack:
                oseqs, ogulp_overlaps = self.begin_sequences(
                    oseq_stack, orings, oheaders, igulp_nframes,
                    istride_nframes)
                if self.shutdown_event.is_set():
                    break
                prev_time = time.time()
                for ispans in izip(*[
                        iseq.read(igulp_nframe, istride_nframe, iframe0)
                        for (iseq, igulp_nframe, istride_nframe, iframe0) in
                        zip(iseqs, igulp_nframes, istride_nframes, iframe0s)
                ]):
                    if self.shutdown_event.is_set():
                        return

                    if any([ispan.nframe_skipped for ispan in ispans]):
                        # There were skipped (overwritten) frames
                        with ExitStack() as ospan_stack:
                            iskip_slices = [
                                slice(iframe0, iframe0 + ispan.nframe_skipped,
                                      istride_nframe)
                                for iframe0, istride_nframe, ispan in zip(
                                    iframe0s, istride_nframes, ispans)
                            ]
                            iskip_nframes = [
                                ispan.nframe_skipped for ispan in ispans
                            ]
                            # ***TODO: Need to loop over multiple ospans here,
                            #            because iskip_nframes can be
                            #            arbitrarily large!
                            ospans = self.reserve_spans(
                                ospan_stack, oseqs, iskip_nframes)
                            ostrides_actual = self._on_skip(
                                iskip_slices, ospans)
                            device.stream_synchronize()
                            self.commit_spans(ospans, ostrides_actual,
                                              ogulp_overlaps)

                    if all([ispan.nframe == 0 for ispan in ispans]):
                        # No data to see here, move right along
                        continue

                    cur_time = time.time()
                    acquire_time = cur_time - prev_time
                    prev_time = cur_time

                    with ExitStack() as ospan_stack:
                        igulp_nframes = [ispan.nframe for ispan in ispans]
                        ospans = self.reserve_spans(ospan_stack, oseqs,
                                                    igulp_nframes)
                        cur_time = time.time()
                        reserve_time = cur_time - prev_time
                        prev_time = cur_time

                        if not force_skip:
                            # *TODO: See if can fuse together multiple on_data calls here before
                            #          calling stream_synchronize().
                            #        Consider passing .data instead of rings here
                            ostrides_actual = self._on_data(ispans, ospans)
                            device.stream_synchronize()

                        any_frames_overwritten = any(
                            [ispan.nframe_overwritten for ispan in ispans])
                        if force_skip or any_frames_overwritten:
                            # Note: To allow interrupted pipelines to catch up,
                            #         we force-skip an additional gulp whenever
                            #         a span is overwritten during on_data.
                            force_skip = any_frames_overwritten
                            iskip_slices = [
                                slice(
                                    ispan.frame_offset, ispan.frame_offset +
                                    ispan.nframe_overwritten, istride_nframe)
                                for ispan, istride_nframe in zip(
                                    ispans, istride_nframes)
                            ]
                            ostrides_actual = self._on_skip(
                                iskip_slices, ospans)
                            device.stream_synchronize()

                        self.commit_spans(ospans, ostrides_actual,
                                          ogulp_overlaps)
                    cur_time = time.time()
                    process_time = cur_time - prev_time
                    prev_time = cur_time
                    self.perf_proclog.update({
                        'acquire_time': acquire_time,
                        'reserve_time': reserve_time,
                        'process_time': process_time
                    })
            # **TODO: This will not be called if an exception is raised
            #           Need to call it from a context manager somehow
            self._on_sequence_end(iseqs)