def forward(self, axis, gamma, x, x_layout, xp, expander, beta, eps, decay, running_mean, running_var): if running_mean is not None: mean = running_mean var = running_var else: # Create dummies. mean = xp.zeros_like(gamma, dtype=x.dtype) var = xp.zeros_like(gamma, dtype=x.dtype) # mean and inv_std are used as buffers to save # intermediate results computed during forward pass. These buffers # are used to speed-up backward pass. cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) reserve_space, y, mean, inv_std = ( cudnn.batch_normalization_forward_training_ex( x, gamma, beta, mean, var, None, None, eps, decay, self.is_for_conv2d, self.cudnn_mode, chainer.is_debug(), d_layout=cudnn_x_layout)) y_layout = x_layout return (y, y_layout, running_mean, running_var, mean, var, inv_std, reserve_space)
def _forward_cudnn(self, x, W, b, y, input_layouts): x_layout, w_layout = input_layouts self.output_layouts = (x_layout, ) pad = (self.ph, self.pw) stride = (self.sy, self.sx) dilation = (self.dy, self.dx) auto_tune = configuration.config.autotune tensor_core = configuration.config.use_cudnn_tensor_core cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) cudnn_w_layout = cuda._get_cudnn_tensor_layout_w(w_layout) cuda.cudnn.convolution_forward(x, W, b, y, pad, stride, dilation, self.groups, auto_tune=auto_tune, tensor_core=tensor_core, d_layout=cudnn_x_layout, w_layout=cudnn_w_layout) return y,
def _forward_cudnn(self, x, gy): x_layout, gy_layout = self.input_layouts w_layout = self.w_layout w_raw_shape = memory_layouts._transpose_shape(self.W_shape, None, w_layout) gW = cuda.cupy.empty(w_raw_shape, dtype=self.W_dtype) pad = (self.ph, self.pw) stride = (self.sy, self.sx) dilation = (self.dy, self.dx) deterministic = configuration.config.cudnn_deterministic auto_tune = configuration.config.autotune tensor_core = configuration.config.use_cudnn_tensor_core cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) cudnn_w_layout = cuda._get_cudnn_tensor_layout_w(w_layout) cuda.cudnn.convolution_backward_filter(x, gy, gW, pad, stride, dilation, self.groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core, d_layout=cudnn_x_layout, w_layout=cudnn_w_layout) return gW,
def _forward_cudnn(self, x, W, b, input_layouts): x_layout, w_layout = input_layouts self.output_layouts = (x_layout, ) n = len(x) _, c, _, _ = memory_layouts._transpose_shape(W.shape, w_layout, None) y_raw_shape = memory_layouts._transpose_shape( (n, c * self.groups, self.outh, self.outw), None, x_layout) y = cuda.cupy.empty(y_raw_shape, dtype=x.dtype) pad = (self.ph, self.pw) stride = (self.sy, self.sx) dilation = (self.dy, self.dx) deterministic = configuration.config.cudnn_deterministic auto_tune = configuration.config.autotune tensor_core = configuration.config.use_cudnn_tensor_core cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) cudnn_w_layout = cuda._get_cudnn_tensor_layout_w(w_layout) cuda.cudnn.convolution_backward_data(W, x, b, y, pad, stride, dilation, self.groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core, d_layout=cudnn_x_layout, w_layout=cudnn_w_layout) return y,
def backward(self, axis, gamma, gy, x, x_layout, xp, expander, mean, inv_std, eps, var, forward_data): cudnn_x_layout = cuda._get_cudnn_tensor_layout_x(x_layout) gx, ggamma, gbeta = cudnn.batch_normalization_backward( x, gamma, gy, mean, inv_std, eps, self.is_for_conv2d, self.cudnn_mode, chainer.is_debug(), d_layout=cudnn_x_layout, reserve_space=forward_data) gx = gx.astype(x.dtype, copy=False) ggamma = ggamma.astype(gamma.dtype, copy=False) gbeta = gbeta.astype(gamma.dtype, copy=False) gx_layout = x_layout return gx, gx_layout, ggamma, gbeta