def call(self): cudnn.convolution_backward_data( self.W, self.gy, self.b, self.gx, self.pads, self.strides, self.dilations, self.groups, deterministic=self.deterministic, auto_tune=self.auto_tune, tensor_core=self.tensor_core)
def test_backward_data(self): if self.layout != libcudnn.CUDNN_TENSOR_NHWC: return unittest.SkipTest() with self.assertRaises(RuntimeError): cudnn.convolution_backward_data( self.W, self.gy, None, self.gx, pad=(self.pad, self.pad), stride=(self.stride, self.stride), dilation=(1, 1), groups=1, deterministic=0, auto_tune=self.auto_tune, tensor_core='always', d_layout=self.layout, w_layout=self.layout)
def _convolve_data_adjoint_cuda(output, filt, data_shape, mode='full', strides=None, multi_channel=False): xp = backend.get_array_module(output) D, b, B, m, n, s, c_i, c_o, p = _get_convolve_params( data_shape, filt.shape, mode, strides, multi_channel) if D == 1: return _convolve_data_adjoint_cuda( xp.expand_dims(output, -1), xp.expand_dims(filt, -1), list(data_shape) + [1], mode=mode, strides=list(strides) + [1] if strides is not None else None, multi_channel=multi_channel).squeeze(-1) elif D > 3: raise ValueError( f'cuDNN convolution only supports 1, 2 or 3D, got {D}.') dilations = (1, ) * D groups = 1 auto_tune = True tensor_core = 'auto' deterministic = False if mode == 'full': pads = tuple(n_d - 1 for n_d in n) elif mode == 'valid': pads = (0, ) * D output = output.reshape((B, c_o) + p) filt = filt.reshape((c_o, c_i) + n) data = xp.empty((B, c_i) + m, dtype=output.dtype) filt = util.flip(filt, axes=range(-D, 0)) cudnn.convolution_backward_data(filt, output, None, data, pads, s, dilations, groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core) # Reshape. data = data.reshape(data_shape) return data
def _convolve_data_adjoint_cuda(output, filt, data_shape, mode='full', strides=None, multi_channel=False): device = backend.get_device(output) xp = device.xp D, b, B, m, n, s, c_i, c_o, p = _get_convolve_params( data_shape, filt.shape, mode, strides, multi_channel) dilations = (1, ) * D groups = 1 auto_tune = True tensor_core = 'auto' deterministic = False if mode == 'full': pads = tuple(n_d - 1 for n_d in n) elif mode == 'valid': pads = (0, ) * D with device: output = output.reshape((B, c_o) + p) filt = filt.reshape((c_o, c_i) + n) data = xp.empty((B, c_i) + m, dtype=output.dtype) filt = util.flip(filt, axes=range(-D, 0)) cudnn.convolution_backward_data(filt, output, None, data, pads, s, dilations, groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core) # Reshape. data = data.reshape(data_shape) return data
def _cudnn_convolve_adjoint_input(W, y, mode='full'): dtype = y.dtype device = backend.get_device(y) xp = device.xp if np.issubdtype(dtype, np.complexfloating): with device: Wr = xp.real(W) Wi = xp.imag(W) yr = xp.real(y) yi = xp.imag(y) # Concatenate real and imaginary to input/output channels y = xp.concatenate([yr, yi], axis=1) W = xp.concatenate([ xp.concatenate([Wr, -Wi], axis=1), xp.concatenate([Wi, Wr], axis=1) ], axis=0) x = _cudnn_convolve_adjoint_input(W, y, mode=mode) # Convert back to complex x = x[:, :x.shape[1] // 2] + 1j * x[:, x.shape[1] // 2:] x = x.astype(dtype) return x ndim = y.ndim - 2 batch_size = len(y) input_channel = W.shape[1] output_shape = y.shape[-ndim:] filter_shape = W.shape[-ndim:] strides = (1, ) * ndim dilations = (1, ) * ndim groups = 1 auto_tune = True tensor_core = 'auto' deterministic = False if mode == 'full': input_shape = tuple(p - n + 1 for p, n in zip(output_shape, filter_shape)) pads = tuple(n - 1 for n in W.shape[2:]) elif mode == 'valid': input_shape = tuple(p + n - 1 for p, n in zip(output_shape, filter_shape)) pads = (0, ) * ndim with device: x = xp.empty((batch_size, input_channel) + input_shape, dtype=dtype) W = util.flip(W, axes=range(-ndim, 0)) cudnn.convolution_backward_data(W, y, None, x, pads, strides, dilations, groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core) return x