def test_affine_double_backward(seed, base_axis, weight_shape, bias, ctx, func_name): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.affine import AffineDataGrad, AffineFilterGrad rng = np.random.RandomState(seed) # Input inputs = [rng.randn(2, 3, 4).astype(np.float32)] # Weight inputs += [rng.randn(*weight_shape).astype(np.float32)] # Bias if bias: inputs += [rng.randn(*weight_shape[1:]).astype(np.float32) * 1e2] else: inputs += [None] func_args = [base_axis] # Affine backward_function_tester(rng, F.affine, inputs, func_args=func_args, dstep=1e-3, ctx=ctx) # DataGrad df, y = grad_function_forward_function_output(AffineDataGrad, F.affine, ctx, inputs, *func_args) df.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape), inputs[1]] backward_function_tester(rng, df, ginputs, func_args=[], atol_accum=2e-2, dstep=1e-3, ctx=ctx, non_accum_check=True) # FilterGrad df, y = grad_function_forward_function_output(AffineFilterGrad, F.affine, ctx, inputs, *func_args) df.wshape = inputs[1].shape ginputs = [rng.randn(*y.shape), inputs[0]] backward_function_tester(rng, df, ginputs, func_args=[], dstep=1e-3, ctx=ctx, non_accum_check=True)
def test_embed_double_backward(seed, shape_x, shape_w, ctx, func_name): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.embed import EmbedFilterGrad rng = np.random.RandomState(seed) n_class = shape_w[0] x = rng.randint(0, n_class - 1, shape_x).astype(np.int32) w = rng.randn(*shape_w).astype(np.float32) inputs = [x, w] # Embed backward_function_tester(rng, F.embed, inputs, ctx=ctx, backward=[False, True]) # FilterGrad df, y = grad_function_forward_function_output(EmbedFilterGrad, F.embed, ctx, inputs) df.wshape = inputs[1].shape ginputs = [rng.randn(*y.shape), inputs[0]] backward_function_tester(rng, df, ginputs, func_args=[], backward=[True, False], atol_accum=3e-2, dstep=1e-3, ctx=ctx, non_accum_check=True)
def test_concatenate_double_backward(seed, axis, different_size, num_inputs, ctx, func_name): from nbla_test_utils import cap_ignore_region, backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.concatenate import ConcatenateDataGrad rng = np.random.RandomState(seed) shape0 = [2, 3, 4] inputs = [] for i in range(num_inputs): inputs.append(rng.randn(*shape0).astype(np.float32)) shape0[axis] += int(different_size) func_kwargs = dict(axis=axis) # 2nd-order backward_function_tester(rng, F.concatenate, inputs=inputs, func_args=[], func_kwargs=func_kwargs, atol_accum=1e-2, dstep=1e-3, ctx=ctx) # 3rd-order df, y = grad_function_forward_function_output(ConcatenateDataGrad, F.concatenate, ctx, inputs, *[], **func_kwargs) df.xshapes = [x.shape for x in inputs] ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, ginputs, ctx=ctx, non_accum_check=True)
def test_sum_pooling_3d_double_backward(seed, inshape, kernel, stride, pad, ignore_border, channel_last, ctx, func_name): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.sum_pooling import SumPoolingDataGrad if channel_last and not func_name.endswith('Cudnn'): pytest.skip('Channel last is only supported in Cudnn so far') if channel_last: t = refs.ChannelLastToFirstTranspose(len(inshape), len(kernel)) inshape = tuple(inshape[i] for i in t.inv_axes) if not ignore_border and func_name.endswith('Cudnn'): pytest.skip('ignore_border=False in Cudnn is not supported.') rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] func_args = [kernel, stride, ignore_border, pad, channel_last] # 2nd-order backward_function_tester(rng, F.sum_pooling, inputs=inputs, func_args=func_args, ctx=ctx) # 3rd-order df, y = grad_function_forward_function_output(SumPoolingDataGrad, F.sum_pooling, ctx, inputs, *func_args) df.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, inputs=ginputs, ctx=ctx, atol_accum=3e-2, non_accum_check=True)
def test_average_pooling_2d_double_backward(seed, inshape, kernel, stride, pad, ignore_border, channel_last, including_pad, ctx, func_name): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.average_pooling import AveragePoolingDataGrad if channel_last and not func_name.endswith('Cudnn'): pytest.skip('Channel last is only supported in Cudnn so far') if channel_last: t = refs.ChannelLastToFirstTranspose(len(inshape), len(kernel)) inshape = tuple(inshape[i] for i in t.inv_axes) rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] func_args = [kernel, stride, ignore_border, pad, channel_last, including_pad] # 2nd-order backward_function_tester(rng, F.average_pooling, inputs=inputs, func_args=func_args, ctx=ctx) # 3rd-order average_pooling_data_grad, y = grad_function_forward_function_output(AveragePoolingDataGrad, F.average_pooling, ctx, inputs, *func_args) average_pooling_data_grad.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, average_pooling_data_grad, inputs=ginputs, func_args=[], ctx=ctx)
def test_pad_constant_double_backward(seed, ctx, func_name, inshape, pad_width, constant_value): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.pad import PadDataGrad rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] func_args = [pad_width, "constant", constant_value] # 2nd-order backward_function_tester(rng, F.pad, inputs, ctx=ctx, func_args=func_args) # 3rd-order # constant value is always zero after 1st-order derivative func_args = [pad_width, "constant", 0] df, y = grad_function_forward_function_output(PadDataGrad, F.pad, ctx, inputs, *func_args) df.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, ginputs, func_args=[], ctx=ctx, atol_f=1e-6, atol_accum=5e-2, non_accum_check=True)
def test_transpose_double_backward(seed, inshape, axes, ctx, func_name): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.transpose import TransposeDataGrad rng = np.random.RandomState(seed) # Input inputs = [rng.randn(*inshape).astype(np.float32)] func_args = [axes] # 2rd-order backward_function_tester(rng, F.transpose, inputs, func_args=func_args, ctx=ctx) # 3rd-order df, y = grad_function_forward_function_output(TransposeDataGrad, F.transpose, ctx, inputs, *func_args) df.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, ginputs, func_args=[], ctx=ctx, non_accum_check=True)
def test_unpooling_double_backward(seed, inshape, kernel, channel_last, ctx, func_name): if channel_last and func_name == "Unpooling": pytest.skip("Unpooling with channel_last is only supported in CUDA.") if channel_last and len(inshape) == len(kernel): pytest.skip( "len(input shape) == len(kernel) is only valid for the channel first." ) from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.unpooling import UnpoolingDataGrad rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] func_args = [kernel, channel_last] # 2nd-order backward_function_tester(rng, F.unpooling, inputs=inputs, func_args=func_args, ctx=ctx) # 3rd-order df, y = grad_function_forward_function_output(UnpoolingDataGrad, F.unpooling, ctx, inputs, *func_args) df.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, inputs=ginputs, ctx=ctx, non_accum_check=True)
def test_fused_batch_normalization_double_backward( seed, axis, decay_rate, eps, nonlinearity, output_stat, batch_stat, add, ctx, func_name, no_scale, no_bias, no_mean, no_variance): import platform if platform.system() == 'Windows' and len(ctx.backend) > 1: pytest.skip("Currently not worked with CUDA/cuDNN on Windows platform." ) # TODO from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.fused_batch_normalization import FusedBatchNormalizationBackward rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis, add)) axes = [axis] func_args = [axes, decay_rate, eps, batch_stat, nonlinearity, output_stat] inputs = mask_inputs(inputs, no_scale, no_bias, no_mean, no_variance) insert_identity = [] if batch_stat: insert_identity = [True, True, True, False, False, False] # 2nd-order backward = [True, True, True, False, False, add] if batch_stat else \ [False, False, False, False, False, False] backward_function_tester(rng, F.fused_batch_normalization, inputs, func_args=func_args, backward=backward, ctx=ctx, insert_identity=insert_identity) # 3rd-order func_args = func_args[:-1] fused_batch_normalization_backward, y = \ grad_function_forward_function_output(FusedBatchNormalizationBackward, F.fused_batch_normalization, ctx, inputs, *func_args) fused_batch_normalization_backward.is_add = add ginputs = [rng.randn(*y.shape)] + inputs + [rng.randn(*y.shape)] if add else \ [rng.randn(*y.shape)] + inputs[:-1] + [rng.randn(*y.shape)] backward_function_tester( rng, fused_batch_normalization_backward, inputs=ginputs, func_args=[], backward=[True, True, False, True, False, False, False, add], ctx=ctx, atol_accum=5e-2, dstep=1e-3, non_accum_check=True)
def test_global_average_pooling_double_backward(seed, fname, ctx, func_name): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.global_average_pooling import GlobalAveragePoolingDataGrad rng = np.random.RandomState(seed) ref_func = eval('ref_' + fname) func = getattr(F, fname) inputs = [rng.random_sample((2, 3, 4, 5))] # 2nd-order backward_function_tester(rng, func, inputs, ctx=ctx) # 3rd-order df, y = grad_function_forward_function_output(GlobalAveragePoolingDataGrad, F.global_average_pooling, ctx, inputs) df.xshape = inputs[0].shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, inputs=ginputs, ctx=ctx, atol_f=1e-6, atol_accum=1e-2, non_accum_check=True)
def test_slice_double_backward(seed, inshape, start, stop, step, ctx, fname): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.slice import SliceDataGrad rng = np.random.RandomState(seed) x = rng.randn(*inshape).astype(np.float32) func_args = [start, stop, step] # 2nd-order backward_function_tester(rng, F.slice, [x], ctx=ctx, func_args=func_args) # 3rd-order df, y = grad_function_forward_function_output(SliceDataGrad, F.slice, ctx, [x], *func_args) df.xshape = x.shape ginputs = [rng.randn(*y.shape)] backward_function_tester(rng, df, ginputs, func_args=[], ctx=ctx, non_accum_check=True)
def test_deconvolution_2d_double_backward(inshape, kernel, outmaps, pad, stride, dilation, group, with_bias, channel_last, output_padding, seed, ctx, func_name): from nbla_test_utils import function_tester, backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.deconvolution import DeconvolutionDataGrad, DeconvolutionFilterGrad if channel_last and not func_name.endswith('Cudnn'): pytest.skip('channel_last=True is only supported in CUDNN backend.') base_axis = len(inshape) - len(kernel) - 1 inmaps = inshape[base_axis] if channel_last: t = refs.ChannelLastToFirstTranspose(len(inshape), len(kernel)) inshape = tuple(inshape[i] for i in t.inv_axes) rng = np.random.RandomState(seed) i = np.clip(rng.randn(*inshape).astype(np.float32), -0.5, 0.5) kshape = (inmaps, ) + (outmaps // group, ) + kernel if channel_last: t = refs.ChannelLastToFirstTranspose(len(kshape), len(kernel)) kshape = tuple(kshape[i] for i in t.inv_axes) k = np.clip(rng.randn(*kshape).astype(np.float32), -0.5, 0.5) base_axis = len(inshape) - 3 b = None if with_bias: b = np.clip(rng.randn(outmaps).astype(np.float32), -0.5, 0.5) inputs = [i, k, b] func_args = [ base_axis, pad, stride, dilation, group, channel_last, output_padding ] # Deconvolution backward_function_tester(rng, F.deconvolution, inputs, func_args=func_args, ctx=ctx, atol_accum=1e-1) # DataGrad df, y = grad_function_forward_function_output(DeconvolutionDataGrad, F.deconvolution, ctx, inputs, *func_args) df.xshape = i.shape ginputs = [rng.randn(*y.shape), k] backward_function_tester(rng, df, ginputs, ctx=ctx, atol_accum=1e-1, non_accum_check=True) # FilterGrad df, y = grad_function_forward_function_output(DeconvolutionFilterGrad, F.deconvolution, ctx, inputs, *func_args) df.wshape = k.shape ginputs = [rng.randn(*y.shape), i] backward_function_tester(rng, df, ginputs, func_args=[], ctx=ctx, atol_accum=1e-1, non_accum_check=True)
def core_test_convolution_double_backward(inshape, kernel, outmaps, pad, stride, dilation, group, channel_last, with_bias, seed, ctx, func_name, non_accum_check=True, atol_f=1e-4, atol_b=1e-3, atol_accum=8e-2, dstep=1e-3): from nbla_test_utils import backward_function_tester, grad_function_forward_function_output from nnabla.backward_function.convolution import ConvolutionDataGrad, ConvolutionFilterGrad if func_name == 'ConvolutionCuda': pytest.skip( 'CUDA Convolution N-D is only supported in CUDNN extension') if channel_last and not func_name.endswith('Cudnn'): pytest.skip( 'channel_last=True is only supported in CUDNN backend so far.') if channel_last and func_name.endswith('Cudnn') and ( np.any(np.asarray(dilation) > 1) or group > 1): import nnabla_ext.cuda as nc major, minor, revision = map(int, nc.__cudnn_version__.split('.')) version = major * 1000 + minor * 100 if version < 7200: pytest.skip( 'channel_last dilated convolution not work in CUDNN {}.'. format(version)) base_axis = len(inshape) - len(kernel) - 1 inmaps = inshape[base_axis] if channel_last: t = refs.ChannelLastToFirstTranspose(len(inshape), len(kernel)) inshape = tuple(inshape[i] for i in t.inv_axes) rng = np.random.RandomState(seed) i = np.clip(rng.randn(*inshape).astype(np.float32), -0.8, 0.8) kshape = (outmaps, ) + (inmaps // group, ) + kernel if channel_last: t = refs.ChannelLastToFirstTranspose(len(kshape), len(kernel)) kshape = tuple(kshape[i] for i in t.inv_axes) k = np.clip(rng.randn(*kshape).astype(np.float32), -0.8, 0.8) b = None if with_bias: b = np.clip(rng.randn(outmaps).astype(np.float32), -0.8, 0.8) inputs = [i, k, b] atol_half = 1.0 if inmaps > 64 else 1e-1 func_args = [base_axis, pad, stride, dilation, group, channel_last] # Convolution backward_function_tester(rng, F.convolution, inputs, func_args=func_args, atol_f=atol_f, atol_accum=atol_accum, dstep=dstep, ctx=ctx) # DataGrad df, y = grad_function_forward_function_output(ConvolutionDataGrad, F.convolution, ctx, inputs, *func_args) df.xshape = i.shape ginputs = [rng.randn(*y.shape), k] backward_function_tester(rng, df, ginputs, func_args=[], atol_f=atol_f, atol_b=atol_b, atol_accum=atol_accum, dstep=dstep, ctx=ctx, non_accum_check=non_accum_check) # FilterGrad df, y = grad_function_forward_function_output(ConvolutionFilterGrad, F.convolution, ctx, inputs, *func_args) df.wshape = k.shape ginputs = [rng.randn(*y.shape), i] backward_function_tester(rng, df, ginputs, func_args=[], atol_f=atol_f, atol_b=atol_b, atol_accum=atol_accum, dstep=dstep, ctx=ctx, non_accum_check=non_accum_check)