示例#1
0
    def run_test_r2c_dtype(self,
                           shape,
                           axes,
                           dtype=np.float32,
                           scale=1.,
                           misalign=0):
        known_data = np.random.uniform(size=shape).astype(np.float32) * 2 - 1
        known_data = (known_data * scale).astype(dtype)

        # Force misaligned data
        padded_shape = shape[:-1] + (shape[-1] + misalign, )
        known_data = np.resize(known_data, padded_shape)
        idata = bf.ndarray(known_data, space='cuda')
        known_data = known_data[..., misalign:]
        idata = idata[..., misalign:]

        oshape = list(shape)
        oshape[axes[-1]] = shape[axes[-1]] // 2 + 1
        odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda')
        fft = Fft()
        fft.init(idata, odata, axes=axes)
        fft.execute(idata, odata)
        known_result = gold_rfftn(known_data.astype(np.float32) / scale,
                                  axes=axes)
        np.testing.assert_allclose(odata.copy('system'), known_result, RTOL,
                                   ATOL)
示例#2
0
 def run_test_c2c_impl(self, shape, axes, inverse=False, fftshift=False):
     shape = list(shape)
     shape[-1] *= 2  # For complex
     known_data = np.random.uniform(size=shape).astype(np.float32).view(
         np.complex64)
     idata = bf.ndarray(known_data, space='cuda')
     odata = bf.empty_like(idata)
     fft = Fft()
     fft.init(idata, odata, axes=axes, apply_fftshift=fftshift)
     fft.execute(idata, odata, inverse)
     if inverse:
         if fftshift:
             known_data = np.fft.ifftshift(known_data, axes=axes)
         # Note: Numpy applies normalization while CUFFT does not
         norm = reduce(lambda a, b: a * b,
                       [known_data.shape[d] for d in axes])
         known_result = gold_ifftn(known_data, axes=axes) * norm
     else:
         known_result = gold_fftn(known_data, axes=axes)
         if fftshift:
             known_result = np.fft.fftshift(known_result, axes=axes)
     x = (np.abs(odata.copy('system') - known_result) / known_result >
          RTOL).astype(np.int32)
     a = odata.copy('system')
     b = known_result
     np.testing.assert_allclose(odata.copy('system'), known_result, RTOL,
                                ATOL)
示例#3
0
    def run_test_r2c_dtype(self,
                           shape,
                           axes,
                           dtype=np.float32,
                           scale=1.,
                           misalign=0):
        known_data = np.random.normal(size=shape).astype(np.float32)
        known_data = (known_data * scale).astype(dtype)

        # Force misaligned data
        padded_shape = shape[:-1] + (shape[-1] + misalign, )
        known_data = np.resize(known_data, padded_shape)
        idata = bf.ndarray(known_data, space='cuda_managed')
        known_data = known_data[..., misalign:]
        idata = idata[..., misalign:]

        oshape = list(shape)
        oshape[axes[-1]] = shape[axes[-1]] // 2 + 1
        odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda_managed')
        fft = Fft()
        fft.init(idata, odata, axes=axes)
        fft.execute(idata, odata)
        stream_synchronize()
        known_result = gold_rfftn(known_data.astype(np.float32) / scale,
                                  axes=axes)
        compare(odata, known_result)
示例#4
0
 def run_test_r2c(self, shape, axes):
     known_data = np.random.uniform(size=shape).astype(np.float32)
     idata = bf.ndarray(known_data, space='cuda')
     oshape = list(shape)
     oshape[axes[-1]] = shape[axes[-1]] // 2 + 1
     odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda')
     fft = Fft()
     fft.init(idata, odata, axes=axes)
     fft.execute(idata, odata)
     known_result = gold_rfftn(known_data, axes=axes)
     np.testing.assert_allclose(odata.copy('system'), known_result, RTOL,
                                ATOL)
示例#5
0
 def run_test_c2r(self, shape, axes):
     ishape = list(shape)
     ishape[axes[-1]] = shape[axes[-1]] // 2 + 1
     ishape[-1] *= 2  # For complex
     known_data = np.random.uniform(size=ishape).astype(np.float32).view(
         np.complex64)
     idata = bf.ndarray(known_data, space='cuda')
     odata = bf.ndarray(shape=shape, dtype='f32', space='cuda')
     fft = Fft()
     fft.init(idata, odata, axes=axes)
     fft.execute(idata, odata)
     # Note: Numpy applies normalization while CUFFT does not
     norm = reduce(lambda a, b: a * b, [shape[d] for d in axes])
     known_result = gold_irfftn(known_data, axes=axes) * norm
     np.testing.assert_allclose(odata.copy('system'), known_result, RTOL,
                                ATOL)
示例#6
0
 def run_test_c2r_impl(self, shape, axes, fftshift=False):
     ishape = list(shape)
     oshape = list(shape)
     ishape[axes[-1]] = shape[axes[-1]] // 2 + 1
     oshape[axes[-1]] = (ishape[axes[-1]] - 1) * 2
     ishape[-1] *= 2 # For complex
     known_data = np.random.normal(size=ishape).astype(np.float32).view(np.complex64)
     idata = bf.ndarray(known_data, space='cuda')
     odata = bf.ndarray(shape=oshape, dtype='f32', space='cuda')
     fft = Fft()
     fft.init(idata, odata, axes=axes, apply_fftshift=fftshift)
     fft.execute(idata, odata)
     # Note: Numpy applies normalization while CUFFT does not
     norm = reduce(lambda a, b: a * b, [shape[d] for d in axes])
     if fftshift:
         known_data = np.fft.ifftshift(known_data, axes=axes)
     known_result = gold_irfftn(known_data, axes=axes) * norm
     compare(odata.copy('system'), known_result)
示例#7
0
class FftBlock(TransformBlock):
    # TODO: Add support for sizes (aka 's') parameter that defines transform
    #         length in each dimension (i.e., cropped/padded transforms).
    #         Should be able to do this using an input callback and padded
    #           output dims.
    def __init__(self,
                 iring,
                 axes,
                 inverse=False,
                 real_output=False,
                 axis_labels=None,
                 apply_fftshift=False,
                 *args,
                 **kwargs):
        super(FftBlock, self).__init__(iring, *args, **kwargs)
        if not isinstance(axes, list) or isinstance(axes, tuple):
            axes = [axes]
        if not isinstance(axis_labels, list) or isinstance(axis_labels, tuple):
            axis_labels = [axis_labels]
        self.specified_axes = axes
        self.real_output = real_output
        self.inverse = inverse
        self.axis_labels = axis_labels
        self.apply_fftshift = apply_fftshift
        self.space = self.irings[0].space
        self.fft = Fft()
        self.plan_ishape = None
        self.plan_oshape = None
        self.plan_istrides = None
        self.plan_ostrides = None

    def define_valid_input_spaces(self):
        return ('cuda', )

    def on_sequence(self, iseq):
        ihdr = iseq.header
        itensor = ihdr['_tensor']
        # TODO: DataType cast should be done inside ring2
        #         **This tensor stuff generally needs to be cleaned up
        itype = DataType(itensor['dtype'])
        # TODO: This is slightly hacky; it needs to emulate the type casting
        #         that Bifrost does internally for the FFT.
        itype = itype.as_floating_point()

        # Get axis indices, allowing for lookup-by-label
        self.axes = [
            itensor['labels'].index(axis)
            if isinstance(axis, basestring) else axis
            for axis in self.specified_axes
        ]

        axes = self.axes
        shape = [itensor['shape'][ax] for ax in axes]

        otype = itype.as_real() if self.real_output else itype.as_complex()
        ohdr = deepcopy(ihdr)
        otensor = ohdr['_tensor']
        otensor['dtype'] = str(otype)
        if itype.is_real and otype.is_complex:
            self.mode = 'r2c'
        elif itype.is_complex and otype.is_real:
            self.mode = 'c2r'
        else:
            self.mode = 'c2c'
        frame_axis = itensor['shape'].index(-1)
        if frame_axis in axes:
            raise KeyError(
                "Cannot transform frame axis; reshape the data stream first")

        # Adjust output shape for real transforms
        if self.mode == 'r2c':
            otensor['shape'][axes[-1]] //= 2
            otensor['shape'][axes[-1]] += 1
        elif self.mode == 'c2r':
            otensor['shape'][axes[-1]] -= 1
            otensor['shape'][axes[-1]] *= 2
            shape[-1] -= 1
            shape[-1] *= 2

        for i, (ax, length) in enumerate(zip(axes, shape)):
            if 'units' in otensor:
                units = otensor['units'][ax]
                otensor['units'][ax] = transform_units(units, -1)
            if 'scales' in otensor:
                otensor['scales'][ax][0] = 0  # TODO: Is this OK?
                scale = otensor['scales'][ax][1]
                otensor['scales'][ax][1] = 1. / (scale * length)
            if 'labels' in otensor and self.axis_labels is not None:
                otensor['labels'][ax] = self.axis_labels[i]
        return ohdr

    def on_data(self, ispan, ospan):
        idata = ispan.data
        odata = ospan.data
        # Check if shapes or strides have changed
        if (idata.shape != self.plan_ishape or odata.shape != self.plan_oshape
                or idata.strides != self.plan_istrides
                or odata.strides != self.plan_ostrides):
            # (Re-)generate the FFT plan
            self.fft.init(idata,
                          odata,
                          axes=self.axes,
                          apply_fftshift=self.apply_fftshift)
            self.plan_ishape = idata.shape
            self.plan_oshape = odata.shape
            self.plan_istrides = idata.strides
            self.plan_ostrides = odata.strides
        size = self.fft.workspace_size
        with self.get_temp_storage(self.space).allocate(size) as workspace:
            self.fft.execute_workspace(idata,
                                       odata,
                                       workspace.ptr,
                                       workspace.size,
                                       inverse=self.inverse)