def test_variable_arithmetic_scalar_ops(seed, op): rng = np.random.RandomState(seed) vx = nn.Variable.from_numpy_array(rng.randn(2, 3, 4).astype(np.float32)) a = rng.randn() if op == "**": vx.d += - vx.d.min() + 1.0 with nn.auto_forward(): vz = eval("vx {0} a".format(op)) ref_z = eval("vx.d {0} a".format(op)) assert_allclose(ref_z, vz.d) # Inplace test with nn.auto_forward(): # Make function reference count of `vx` to 1. vx = nn.functions.identity(vx) vx_bak = vx if op == "+": vx += a elif op == "-": vx -= a elif op == "*": vx *= a elif op == "/": vx /= a elif op == "**": vx **= a assert_allclose(vx.d, vz.d) assert vx is not vx_bak
def test_random_choice_with_replacement(ctx, func_name, seed): trials = 1000000 x = nn.Variable([100], need_grad=True) x.d = np.random.random(x.size).astype(np.float32) w = nn.Variable([x.size], need_grad=True) w.d = np.random.randint(1, 100, w.size) with nn.context_scope(ctx), nn.auto_forward(True): y = F.random_choice(x, w, shape=[trials], replace=True, seed=seed) hist_nn, _ = np.histogram(y.d) hist_np, _ = np.histogram( np.random.choice(x.d, trials, True, w.d / w.d.sum())) assert np.allclose(hist_nn / trials, hist_np / trials, atol=1e-2) x.g = w.g = 0 y.backward() assert np.allclose(x.g / trials, w.d / w.d.sum(), atol=1e-2) assert np.allclose(w.g / trials, w.d / w.d.sum(), atol=1e-2) x = nn.Variable.from_numpy_array(np.array([[1, 2, 3], [-1, -2, -3]])) w = nn.Variable.from_numpy_array(np.array([[1, 1, 1], [10, 10, 10]])) with nn.context_scope(ctx), nn.auto_forward(): y = F.random_choice(x, w, shape=(10, ), replace=True, seed=seed) assert y.shape == (2, 10) and np.all(y.d[0] > 0) and np.all(y.d[1] < 0) return x = nn.Variable((3, 3), need_grad=True) w = nn.Variable((3, 3), need_grad=True) w.d = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) with nn.context_scope(ctx), nn.auto_forward(True): y = F.random_choice(x, w, shape=[10], replace=True, seed=seed) x.g = w.g = 0 y.backward(1) assert np.all(x.g == np.array([[10, 0, 0], [0, 10, 0], [0, 0, 10]])) assert np.all(w.g == np.array([[10, 0, 0], [0, 10, 0], [0, 0, 10]]))
def test_bool_scatter_inplace(seed, ctx, func_name, gshape, mask_shape): from nbla_test_utils import inplace_function_test_helper rng = np.random.RandomState(seed) gdata0 = rng.randn(*gshape).astype(np.float32) mask = rng.randint(0, 2, size=mask_shape) sdata = gdata0[mask.astype(np.bool)] gdata1 = rng.randn(*gshape).astype(np.float32) v_sdata = nn.Variable.from_numpy_array(sdata).apply(need_grad=True) v_mask = nn.Variable.from_numpy_array(mask) v_gdata1 = nn.Variable.from_numpy_array(gdata1).apply(need_grad=True) with nn.auto_forward(): v_gdata2 = F.bool_scatter(v_sdata, v_mask, v_gdata1) # inplace check np.testing.assert_allclose( v_gdata2.d, v_gdata1.d, err_msg="F.bool_scatter(inplace) is not inplaced.") # ref check gdata2 = ref_bool_scatter_inplace(sdata, mask, gdata1) np.testing.assert_allclose(v_gdata2.d, gdata2, err_msg="F.bool_scatter(inplace) fails.") # backward wrt inplaced variable (wrt sdata is checked in not-inplaced case) egrad = rng.randn(*gdata2.shape) mask = mask if mask.shape == gdata1.shape else \ mask.reshape(mask.shape + (1, ) * (gdata1.ndim - mask.ndim)) ref_grad = egrad * (1 - mask) v_gdata1.grad.fill(0) v_gdata2.backward(egrad) np.testing.assert_allclose( v_gdata1.g, ref_grad, err_msg="F.bool_scatter(inplace) backward wrt inplace data fails.") bgrad = rng.randn(*gdata1.shape) v_gdata1.g = bgrad v_gdata2.backward(egrad) np.testing.assert_allclose( v_gdata1.g - bgrad, ref_grad, atol=1e-6, err_msg= "F.bool_scatter(inplace) backward (accum) wrt inplace data fails.") # nn.grad (wrt sdata is checked in not-inplaced case) with nn.auto_forward(): d_gdata1 = nn.grad([v_gdata2], [v_gdata1], grad_outputs=[egrad]) np.testing.assert_allclose( d_gdata1[0].d, ref_grad, atol=1e-6, err_msg="nn.grad (F.bool_scatter(inplace)) wrt inplace data fails.")
def test_fused_batch_normalization_forward_backward(seed, axis, decay_rate, eps, nonlinearity, output_stat, add, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis, add)) axes = [axis] batch_stat = True function_tester(rng, F.fused_batch_normalization, ref_fused_batch_normalization, inputs, ref_grad=ref_grad_fused_batch_normalization, func_args=[ axes, decay_rate, eps, batch_stat, nonlinearity, output_stat ], backward=[True, True, True, False, False, add], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. vinputs = [] for i in inputs: if i is None: vinputs.append(None) continue vinputs.append(nn.Variable.from_numpy_array(i, need_grad=True)) for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_fused_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, nonlinearity, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.fused_batch_normalization(*( vinputs + [axes, decay_rate, eps, batch_stat, nonlinearity, output_stat] )) assert np.allclose(vinputs[3].d, inputs[3]) assert np.allclose(vinputs[4].d, inputs[4], atol=1e-3) # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_fused_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, nonlinearity, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.fused_batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, nonlinearity, output_stat])) assert np.allclose(ref_y, y.d, atol=1e-6)
def test_dense(self): x = nn.Variable((1, 2)) x.d = [[0, 1]] with nn.parameter_scope("dense"), nn.auto_forward(): output = dense(x, 3) self.assertTrue(np.all(nn.get_parameters()["affine/b"].d == 0)) with nn.parameter_scope("dense"), nn.auto_forward(): output_ref = F.tanh(PF.affine(x, 3)) self.assertTrue(np.allclose(output.d, output_ref.d))
def test_lstm_dropout(self): xs = nn.Variable((1, 2, 1)) mask = nn.Variable((1, 2)) mask.d = [[1, 0]] with nn.parameter_scope("lstm_dropout"), nn.auto_forward(): hs_ref, cs_ref = lstm(xs * 0.9, mask, 1) with nn.parameter_scope("lstm_dropout"), nn.auto_forward(): hs, cs = lstm(xs, mask, 1, dropout=0.1, train=False) self.assertTrue(np.allclose(hs.d, hs_ref.d)) self.assertTrue(np.allclose(cs.d, cs_ref.d))
def test_min_with_index(seed, ctx, func_name, inshape, axis, keepdims): x = np.random.RandomState(seed).randn(*inshape).astype(np.float32) x = nn.Variable.from_numpy_array(x) with nn.context_scope(ctx), nn.auto_forward(True): val, idx = F.min(x, axis, keepdims, with_index=True) assert_allclose(val.d, np.amin(x.d, axis, keepdims=keepdims)) shape = [a for i, a in enumerate(x.d.shape) if i not in axis] + [-1] assert np.all(idx.d == x.d.reshape(*shape).argmin(-1).reshape(idx.d.shape)) with nn.context_scope(ctx), nn.auto_forward(True): idx = F.min(x, axis, keepdims, only_index=True) shape = [a for i, a in enumerate(x.d.shape) if i not in axis] + [-1] assert np.all(idx.d == x.d.reshape(*shape).argmin(-1).reshape(idx.d.shape))
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps, output_stat, batch_stat, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis)) axes = [axis] if ctx.backend[0].split(':')[0] != 'cpu' and batch_stat == False: pytest.skip( "cuda and cudnn implementation for batch_stat==False is not implemented yet" ) else: function_tester( rng, F.batch_normalization, ref_batch_normalization, inputs, func_args=[axes, decay_rate, eps, batch_stat, output_stat], backward=[True, True, True, False, False], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(vinputs[3].d, inputs[3], atol=1e-7) assert np.allclose(vinputs[4].d, inputs[4]) # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(ref_y, y.d, atol=1e-6)
def test_max_with_index(seed, ctx, func_name, inshape, axis, keepdims): x = np.random.RandomState(seed).randn(*inshape).astype(np.float32) x = nn.Variable.from_numpy_array(x) # with_index with nn.context_scope(ctx), nn.auto_forward(True): val, idx = F.max(x, axis, keepdims, with_index=True) assert_allclose(val.d, np.amax(x.d, axis, keepdims=keepdims)) assert np.all(idx.d == argmax(x.d, axis).reshape(idx.d.shape)) # only_index with nn.context_scope(ctx), nn.auto_forward(True): idx = F.max(x, axis, keepdims, only_index=True) assert np.all(idx.d == argmax(x.d, axis).reshape(idx.d.shape))
def test_no_grad(auto_forward): import nnabla as nn import nnabla.functions as F import nnabla.parametric_functions as PF nn.clear_parameters() def network(x): def conv_bn_relu(h, i, name, skip=True): s = h imaps = h.shape[1] with nn.parameter_scope(name): h = PF.convolution(h, imaps, (3, 3), pad=(1, 1)) h = PF.batch_normalization(h) h = F.relu(h) if not skip: return F.concatenate(*[h, s], axis=1) if i % 2 == 0 else h + s h = F.split(h, axis=1) h = [h_.reshape(h_.shape[:1] + (1, ) + h_.shape[1:]) for h_ in h] h = F.concatenate(*h, axis=1) return h h = x h = conv_bn_relu(h, 0, "first-conv", False) for i in range(10): h = conv_bn_relu(h, i, f"{i:00d}-conv") pred = F.tanh(h) return pred def assert_need_grad_flase(f): for inp in f.inputs: assert inp.need_grad == False, "need_grad must be false" for out in f.outputs: assert out.need_grad == False, "need_grad must be false" x = nn.Variable.from_numpy_array(np.random.randn(4, 16, 32, 32)) \ .apply(need_grad=False) \ .apply(persistent=True) with nn.auto_forward(auto_forward): y0 = network(x) y0.forward(clear_no_need_grad=True) if not auto_forward else None with nn.auto_forward(auto_forward), nn.no_grad(): y1 = network(x) y1.forward(clear_no_need_grad=True) if not auto_forward else None y1.visit(assert_need_grad_flase) np.testing.assert_allclose(y0.d, y1.d)
def test_variable_arithmetic_unary_ops(seed, op): rng = np.random.RandomState(seed) vx = nn.Variable.from_numpy_array(rng.randn(2, 3, 4).astype(np.float32)) with nn.auto_forward(): vz = eval("{0} vx".format(op)) ref_z = eval("{0} vx.d".format(op)) assert np.allclose(ref_z, vz.d)
def test_reshape(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_s = v2.reshape((3, 4, 2)) v3 = F.identity(v2_s) v3.backward(clear_buffer=False) assert np.all(v2_s.g.flat == v2.g.flat) assert np.all(v2_s.g == 1) v2.d = 1 assert np.all(v2_s.d == 1) v2.g = 1.5 assert np.all(v2_s.g == 1.5) # Check unlink v2_su = v2.reshape((3, 4, 2), unlink=True) assert v2_su.need_grad assert v2_su.parent is None v2_su.need_grad = False v2_su2 = v2_su.reshape((3, 4, 2), unlink=True) assert not v2_su2.need_grad assert v2_su2.parent is None
def ref_fused_batch_normalization(x, beta, gamma, rmean, rvar, z, axes, decay_rate, eps, batch_stat, nonlinearity, output_stat): with nn.context_scope(cpu_context): xvar = nn.Variable.from_numpy_array(x) betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) if z is not None: zvar = nn.Variable.from_numpy_array(z) with nn.auto_forward(): bn = F.batch_normalization(xvar, betavar, gammavar, rmeanvar, rvarvar, axes, decay_rate, eps, batch_stat, output_stat) if z is None: if output_stat: y = bn[0] else: y = bn else: if output_stat: y = F.add2(bn[0], zvar) else: y = F.add2(bn, zvar) y = F.relu(y) rmean[:] = rmeanvar.d rvar[:] = rvarvar.d if output_stat: return y.d, bn[1].d, bn[2].d else: return y.d
def get_value(val, dtype=float, reduction=True): """ get float value from nn.NdArray / nn.Variable / np.ndarray / float """ # get NdArray from Variable if isinstance(val, nn.Variable): val = val.data # get value as float if isinstance(val, nn.NdArray): assert not val.clear_called # take average if val has more than one element if reduction and val.size > 1: with nn.auto_forward(), nn.no_grad(): val = F.mean(val) v = val.get_data("r") elif isinstance(val, np.ndarray): if reduction and val.size > 1: val = np.mean(val) v = val else: assert isinstance(val, (int, float, np.generic)) v = val return dtype(v)
def render(pose, intrinsic, mask_obj, conf): assert conf.height % conf.batch_height == 0, \ f"conf.height ({conf.height}) % conf.batch_height ({conf.batch_height}) != 0" W, H = conf.width, conf.height bh = conf.batch_height xy = generate_all_pixels(W, H) xy = xy.reshape((1, H, W, 2)) camloc = nn.Variable([1, 3]) raydir = nn.Variable([1, bh * W, 3]) with nn.auto_forward(False): color_pred = _render(camloc, raydir, conf).reshape((1, bh, W, 3)) rimage = np.ndarray([1, H, W, 3]) for h in tqdm(range(0, H, bh), desc="Rendering"): xy_h = xy[:, h:h+bh, :, :].reshape((1, bh * W, 2)) raydir.d, camloc.d = generate_raydir_camloc(pose, intrinsic, xy_h) color_pred.forward(clear_buffer=True) rimage[0, h:h+bh, :, :] = color_pred.d.copy() rimage = rimage * mask_obj return rimage.transpose((0, 3, 1, 2)) # NCHW
def test_flipping(key): b, c, h, w = 8, 8, 16, 16 x_np = np.random.rand(b, c, h, w) x_nn = nn.NdArray.from_numpy_array(x_np) with nn.auto_forward(True): assert np.allclose(eval("x_nn[{key}].data".format(key=key)), eval("x_np[{key}]".format(key=key)))
def test_pack_and_unpack(total_length, enforce_sorted, batch_first, shapes, seed, ctx, func_name): rng = np.random.RandomState(seed) sequences = [rng.randn(*shape).astype(np.float32) for shape in shapes] if not enforce_sorted: indices = rng.permutation(len(sequences)) sequences = [sequences[i] for i in indices] sequences = [nn.Variable.from_numpy_array(s) for s in sequences] with nn.context_scope(ctx), nn.auto_forward(): padded_sequence0 = rnn_utils.pad_sequence(sequences, batch_first) packed_sequence = rnn_utils.pack_sequence(sequences, batch_first, enforce_sorted) padded_sequence, _ = rnn_utils.pad_packed_sequence(packed_sequence, batch_first, total_length) if total_length is not None: batch_sizes = packed_sequence.batch_sizes T = batch_sizes.shape[0] padded_sequence = padded_sequence[:, :T, ...] if batch_first else \ padded_sequence[:T, :, ...] np.testing.assert_allclose(padded_sequence0.d, padded_sequence.d)
def test_1d_array_indexing(): # 1d-tensor d = 16 x_data = np.random.rand(d) x = nn.NdArray.from_numpy_array(x_data) with nn.auto_forward(True): x_data_key = x_data[1] x_key = x[1] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[:] x_key = x[:] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[3:8] x_key = x[3:8] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[3:16:3] x_key = x[3:16:3] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[...] x_key = x[...] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[np.newaxis] x_key = x[np.newaxis] assert np.allclose(x_key.data, x_data_key)
def test_graph_logreg(seed): rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4], need_grad=True) w = nn.Variable([12, 5], need_grad=True) b = nn.Variable([5], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) w.d = rng.randn(*w.shape) b.d = rng.randn(*b.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definition with nn.auto_forward(): z = F.affine(x, w, b, 1) l = F.softmax_cross_entropy(z, t, 1) L = F.mean(l) # Backprop # Diff should be initialized since they are always accumulated x.g = 0 w.g = 0 b.g = 0 L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) inputs = [x, w, b] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert_allclose(ngrad, agrad, atol=1e-2)
def test_scalar_dot(seed, scalar, is_dynamic): rng = np.random.RandomState(seed) a1 = scalar a2 = rng.randn(3, 4, 5, 6).astype(np.float32) n = nn.NdArray.from_numpy_array(a2) v = nn.Variable.from_numpy_array(a2) ref = F.dot(a1, a2) ans1 = F.dot(a1, n) assert_allclose(ans1.data, ref) out1 = nn.NdArray((3, 4, 5, 6)) F.dot(a1, n, out1) assert_allclose(out1.data, ref) with nn.auto_forward(is_dynamic): ans2 = F.dot(a1, v) if not is_dynamic: ans2.forward() assert_allclose(ans2.d, ref) out2 = nn.Variable((3, 4, 5, 6)) F.dot(a1, v, out2) if not is_dynamic: out2.forward() assert_allclose(out2.d, ref)
def test_4d_array_indexing(): # 4d-tensor b, c, h, w = 8, 16, 40, 40 x_data = np.random.rand(b, c, h, w) x = nn.NdArray.from_numpy_array(x_data) with nn.auto_forward(True): x_data_key = x_data[:, :, 4:36, 4:36] x_key = x[:, :, 4:36, 4:36] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[:, 0, :, :] x_key = x[:, 0, :, :] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[3, ...] x_key = x[3, ...] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[3, 0, :, :] x_key = x[3, 0, :, :] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[3, ...] x_key = x[3, ...] assert np.allclose(x_key.data, x_data_key) x_data_key = x_data[...] x_key = x[...] assert np.allclose(x_key.data, x_data_key)
def test_random_shift_forward_backward(seed, inshape, shifts, border_mode, constant_value, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_shift(i, shifts, border_mode, constant_value, 0, seed) result_shifts = (0, 0, 0) max_correl = 0 for shift_amount in itertools.product(*map( tuple, map(lambda x: range(*x), [(-2, 3) for _ in range(len(inshape))]))): r = scipy_shift(inputs[0], shift_amount, mode=border_mode, cval=constant_value) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: result_shifts = shift_amount max_correl = correl_and_p[0] ref = scipy_shift(inputs[0], result_shifts, mode=border_mode, cval=constant_value) if shifts is None: shifts = (0, ) * len(inputs[0].shape) for result, shift_range in zip(result_shifts, shifts): assert abs(result) <= shift_range assert_allclose(o.d, ref) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert_allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.backward(o_grad) assert np.all(i.g == 0)
def test_ndarray_arithmetic_matmul_ops(seed, shape, is_dynamic): rng = np.random.RandomState(seed) a1 = rng.randn(*shape[0]).astype(np.float32) a2 = rng.randn(*shape[1]).astype(np.float32) n1 = nn.NdArray.from_numpy_array(a1) n2 = nn.NdArray.from_numpy_array(a2) v1 = nn.Variable.from_numpy_array(a1) v2 = nn.Variable.from_numpy_array(a2) ref = a1 @ a2 # NdArray @ NdArray ans1 = n1 @ n2 assert_allclose(ref, ans1.data, atol=1e-5) # NdArray @ Variable ans2 = n1 @ v2 assert_allclose(ref, ans2.data, atol=1e-5) # Variable @ NdArray ans3 = v1 @ n2 assert_allclose(ref, ans3.data, atol=1e-5) # Variable @ Variable with nn.auto_forward(is_dynamic): ans4 = v1 @ v2 if not is_dynamic: ans4.forward() assert_allclose(ref, ans4.d, atol=1e-5)
def test_graph_logreg(seed): rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4], need_grad=True) w = nn.Variable([12, 5], need_grad=True) b = nn.Variable([5], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) w.d = rng.randn(*w.shape) b.d = rng.randn(*b.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion with nn.auto_forward(): z = F.affine(x, w, b, 1) l = F.softmax_cross_entropy(z, t, 1) L = F.mean(l) # Backprop # Diff should be initialized since they are always accumulated x.g = 0 w.g = 0 b.g = 0 L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) inputs = [x, w, b] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert np.allclose(ngrad, agrad, atol=1e-2)
def test_clip_by_value_forward(seed, shape, dtype): def convert(value): converter = dtype if dtype in (float, np.array) else dtype.from_numpy_array return converter(value) rng = np.random.RandomState(seed) x_data = rng.randn(*shape) x = nn.Variable.from_numpy_array(x_data) if dtype is float: min_data = rng.randn() max_data = rng.randn() else: min_data = rng.randn(*shape) max_data = rng.randn(*shape) min_ = convert(min_data) max_ = convert(max_data) if dtype is not np.array: with nn.auto_forward(True): y = F.clip_by_value(x, min_, max_) y_ref = ref_clip_by_value(x_data, min_data, max_data) if dtype in (nn.Variable, float): assert_allclose(y.d, y_ref) elif dtype is nn.NdArray: assert_allclose(y.data, y_ref) else: with pytest.raises(TypeError): y = F.clip_by_value(x, min_data, max_data)
def ref_grad_binary_connect_convolution(x, w, wb, b, dy, base_axis, pad, stride, dilation, group, quantize_zero_to, **kw): # Set variables vx = nn.Variable(x.shape, need_grad=True) vx.d = x vx.grad.zero() vw = nn.Variable(w.shape, need_grad=True) vw.d = binarize(w, quantize_zero_to) vw.grad.zero() vb = None if b is not None: vb = nn.Variable(b.shape, need_grad=True) vb.d = b vb.grad.zero() # Execute binarized forward and back prop. with nn.auto_forward(): vy = F.convolution(vx, vw, vb, base_axis, pad, stride, dilation, group) vy.backward(dy) # Return grads if b is None: return np.concatenate([vx.g.flat, vw.g.flat]) return np.concatenate([vx.g.flat, vw.g.flat, vb.g.flat])
def sample_from_pretrained_controller(args): """ Experimental Implementation. """ assert args.num_sampling > 0, "num_sampling must be > 0." path = os.path.join(args.model_save_path, 'controller_params.h5') assert os.path.exists(path), "controller's weights seem to be missing!" nn.parameter.load_parameters(path) for i in range(args.num_sampling): output_line = " Sampled Architecture {} / {} ".format( (i + 1), args.num_sampling) print("\n{0:-^80s}\n".format(output_line)) with nn.auto_forward(): arc_seq, _, _, _ = sample_from_controller(args) sample_arch = list() for arc in arc_seq: sample_arch.extend(arc.tolist()) show_arch(sample_arch) filename = "sampled_macro_arch_{}.npy".format(i) np.save("sampled_macro_arch_{}.npy".format(i), np.array(sample_arch)) print("when you want to train the sampled network from scratch,\n\ type like 'python macro_retrain.py <OPTION> --recommended-arch {}'".format( filename))
def predict(x): with nn.auto_forward(): x = x.reshape((1, sentence_length_source)) enc_input = nn.Variable.from_numpy_array(x) enc_input = time_distributed(PF.embed)(enc_input, vocab_size_source, embedding_size, name='enc_embeddings') # encoder with nn.parameter_scope('encoder'): output, c, h = LSTMEncoder(enc_input, hidden, return_sequences=True, return_state=True) # decoder params = [ nn.get_parameters()['dec_embeddings/embed/W'], nn.get_parameters()['output/affine/W'], nn.get_parameters()['output/affine/b'] ] ret = LSTMAttentionDecoder(encoder_output=output, initial_state=(c, h), inference_params=params, name='decoder') return ret
def test_decoder(self): target_action = nn.Variable((1, 2, 3)) target_action.d = 0 target_action_type = nn.Variable((1, 2, 3)) target_action_type.d = 0 target_action_type.d[0, 0, 2] = 1 target_node_type = nn.Variable((1, 2)) target_node_type.d = 0 target_parent_rule = nn.Variable((1, 2)) target_parent_rule.d = 0 target_parent_rule = nn.Variable((1, 2)) target_parent_rule.d = 0 target_parent_rule.d[0, 1] = -1 target_parent_index = nn.Variable((1, 2)) target_parent_index.d = 1 query_embed = nn.Variable((1, 3, 1)) query_embed.d = 1 query_embed_mask = nn.Variable((1, 3)) query_embed_mask.d = [[1, 1, 0]] with nn.parameter_scope("decoder"), nn.auto_forward(): _, hs, cs, ctx, mask, hist = decoder( target_action, target_action_type, target_node_type, target_parent_rule, target_parent_index, query_embed, query_embed_mask, 2, 2, 2, 128, 64, 256, 50) self.assertEqual(hs.shape, (1, 2, 256)) self.assertEqual(cs.shape, (1, 2, 256)) self.assertEqual(ctx.shape, (1, 2, 1)) self.assertEqual(mask.shape, (1, 2)) self.assertEqual(hist.shape, (1, 3, 256)) self.assertTrue(np.all(mask.d == [[1, 0]]))
def test_mean_subtraction_forward_backward(seed, inshape, base_axis, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) mean_shape = inshape[base_axis:] if base_axis >= 0 else inshape[base_axis + len(inshape ):] inputs = [ np.array(rng.randn(*inshape).astype(np.float32)), np.zeros(mean_shape), np.array([1000]) ] batch_stat = True function_tester(rng, F.mean_subtraction, ref_mean_subtraction, inputs, func_args=[base_axis, batch_stat], ctx=ctx, func_name=func_name, dstep=1e-2, backward=[True, False, False], atol_b=1e-2) # Check if running mean works. vinputs = [] for input in inputs: vinputs.append(nn.Variable(input.shape, True)) vinputs[-1].d = input for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y, rmean = ref_mean_subtraction(*(inputs + [base_axis, batch_stat])) with nn.auto_forward(): y = F.mean_subtraction(*(vinputs + [base_axis, batch_stat])) # print('vinput[1].d', vinputs[1].d, vinputs[2].d) # print('inputs[1]', inputs[1], inputs[2]) assert_allclose(vinputs[1].d, inputs[1]) # Check if global stat mode works batch_stat = False ref_y = ref_mean_subtraction(*(inputs + [base_axis, batch_stat])) with nn.auto_forward(): y = F.mean_subtraction(*(vinputs + [base_axis, batch_stat])) assert_allclose(ref_y, y.d)
def test_clip_by_norm_forward(seed, shape, clip_norm, axis): rng = np.random.RandomState(seed) x_data = rng.randn(*shape) x = nn.Variable.from_numpy_array(x_data) with nn.auto_forward(True): y = F.clip_by_norm(x, clip_norm, axis) y_ref = ref_clip_by_norm(x_data, clip_norm, axis=axis) assert np.allclose(y.d, y_ref)
def test_arithmetic_unary_ops(seed, op): rng = np.random.RandomState(seed) vx = nn.Variable([2, 3, 4]) vx.d = rng.randn(*vx.shape).astype(np.float32) with nn.auto_forward(): vz = eval("{0} vx".format(op)) ref_z = eval("{0} vx.d".format(op)) assert np.allclose(ref_z, vz.d)
def test_image_augmentation_forward(seed, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape shape = (3, 5, 8) with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, shape=shape, pad=(2, 2), min_scale=0.8, max_scale=1.2, angle=0.2, aspect_ratio=1.1, distortion=0.1, flip_lr=True, flip_ud=False, brightness=0.1, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.1, seed=0) assert o.d.shape == (inputs[0].shape[0],) + shape
def test_one_hot_forward(seed, inshape, shape, ctx, func_name): rng = np.random.RandomState(seed) # Input input = rng.randint(0, shape[0], size=inshape) vinput = nn.Variable(input.shape, need_grad=False) vinput.d = input with nn.context_scope(ctx), nn.auto_forward(): o = F.one_hot(vinput, shape) r = ref_one_hot(input, shape) assert np.allclose(o.d, r) assert func_name == o.parent.name
def test_arithmetic_scalar_rops(seed, op): rng = np.random.RandomState(seed) vx = nn.Variable([2, 3, 4]) vx.d = rng.randn(*vx.shape).astype(np.float32) a = rng.randn() if op == "**": a = np.abs(a) with nn.auto_forward(): vz = eval("a {0} vx".format(op)) ref_z = eval("a {0} vx.d".format(op)) assert np.allclose(ref_z, vz.d)
def test_arithmetic_ops2(seed, op): rng = np.random.RandomState(seed) vx = nn.Variable([2, 3, 4]) vy = nn.Variable([2, 3, 4]) vx.d = rng.randn(*vx.shape).astype(np.float32) vy.d = rng.randn(*vy.shape).astype(np.float32) if op == "**": vx.d = - vx.d.min() + 1.0 with nn.auto_forward(): vz = eval("vx {0} vy".format(op)) ref_z = eval("vx.d {0} vy.d".format(op)) assert np.allclose(ref_z, vz.d)
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps, output_stat, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis)) axes = [axis] batch_stat = True function_tester(rng, F.batch_normalization, ref_batch_normalization, inputs, func_args=[axes, decay_rate, eps, batch_stat, output_stat], backward=[True, True, True, False, False], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(vinputs[3].d, inputs[3]) assert np.allclose(vinputs[4].d, inputs[4], atol=1e-3) # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(ref_y, y.d, atol=1e-6)
def test_random_crop_forward_backward(seed, inshape, shape, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_crop(i, shape, 0, seed) if shape is not None: max_correl = 0 possible_crop_range = [ input - output for output, input in zip(shape, inshape)] for crop_pos in itertools.product(*map(tuple, map(lambda x: range(*x), [(0, r + 1) for r in possible_crop_range]))): r = inputs[0][crop_pos[0]:crop_pos[0] + shape[0], crop_pos[1]:crop_pos[1] + shape[1], crop_pos[2]:crop_pos[2] + shape[2]] assert(o.d.shape == r.shape) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: max_correl = correl_and_p[0] else: max_correl = pearsonr(o.d.flatten(), inputs[0].flatten())[0] assert(max_correl == 1.0) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_diff = rng.randn(*o.shape).astype(i.d.dtype) o.backward(o_diff) assert np.all(i.g == 0)
def test_random_shift_forward_backward(seed, inshape, shifts, border_mode, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_shift(i, shifts, border_mode, 0, seed) result_shifts = (0, 0, 0) max_correl = 0 for shift_amount in itertools.product(*map(tuple, map(lambda x: range(*x), [(-2, 3) for _ in range(len(inshape))]))): r = scipy_shift(inputs[0], shift_amount, mode=border_mode) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: result_shifts = shift_amount max_correl = correl_and_p[0] ref = scipy_shift(inputs[0], result_shifts, mode=border_mode) if shifts is None: shifts = (0,) * len(inputs[0].shape) for result, shift_range in zip(result_shifts, shifts): assert abs(result) <= shift_range assert np.allclose(o.d, ref) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.backward(o_grad) assert np.all(i.g == 0)
def test_mean_subtraction_forward_backward(seed, inshape, base_axis, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) mean_shape = inshape[base_axis:] inputs = [np.array(rng.randn(*inshape).astype(np.float32)), np.zeros(mean_shape), np.array([1000])] batch_stat = True function_tester(rng, F.mean_subtraction, ref_mean_subtraction, inputs, func_args=[base_axis, batch_stat], ctx=ctx, func_name=func_name, dstep=1e-2, backward=[True, False, False], atol_b=1e-2) # Check if running mean works. vinputs = [] for input in inputs: vinputs.append(nn.Variable(input.shape, True)) vinputs[-1].d = input for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y, rmean = ref_mean_subtraction( *(inputs + [base_axis, batch_stat])) with nn.auto_forward(): y = F.mean_subtraction(*(vinputs + [base_axis, batch_stat])) # print 'vinput[1].d', vinputs[1].d, vinputs[2].d # print 'inputs[1]', inputs[1], inputs[2] assert np.allclose(vinputs[1].d, inputs[1]) # Check if global stat mode works batch_stat = False ref_y = ref_mean_subtraction(*(inputs + [base_axis, batch_stat])) with nn.auto_forward(): y = F.mean_subtraction(*(vinputs + [base_axis, batch_stat])) assert np.allclose(ref_y, y.d)
def test_rehape(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_s = v2.reshape((3, 4, 2)) v3 = F.identity(v2_s) v3.backward(clear_buffer=False) assert np.all(v2_s.g.flat == v2.g.flat) assert np.all(v2_s.g == 1) v2.d = 1 assert np.all(v2_s.d == 1) v2.g = 1.5 assert np.all(v2_s.g == 1.5)
def test_unlinked(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_u = v2.unlinked() v3 = F.identity(v2_u) v2_u.grad.zero() v2_g = v2_u.g.copy() v3.backward(clear_buffer=False) assert type(v2_u) == type(v2) assert np.all(v.g == grad) assert np.all(v2_u.g == v2.g) assert np.all(v2_u.g == v2_g + 1)
def test_dropout_forward_backward(p, seed, ctx, func_name): from nbla_test_utils import cap_ignore_region, function_tester rng = np.random.RandomState(seed) inputs = [ cap_ignore_region( rng.randn(2, 3, 4).astype(np.float32) * 2, (-1e-3, 1e-3))] # Ensure there is no zero. i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.dropout(i, p) scale = 1. / (1. - p) mask = o.d != 0 assert np.allclose(o.d, i.d * mask * scale) assert o.parent.name == func_name # NNabla backward orig_grad = rng.randn(*i.shape).astype(i.data.dtype) i.g[...] = orig_grad o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.backward(o_grad) ref_grad = o_grad * mask * scale # Verify assert np.allclose(i.g, orig_grad + ref_grad) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad) # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if need_grad works i.g[...] = 0 i.need_grad = False o.backward(o_grad) assert np.all(i.g == 0)
def test_random_flip_forward_backward(seed, axes, ctx, func_name): from nbla_test_utils import cap_ignore_region, function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(2, 3, 4).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_flip(i, axes, 0, seed) flip_close = np.allclose(o.d, ref_flip(inputs[0], axes)) assert flip_close or (not flip_close and np.allclose(o.d, i.d)) assert o.parent.name == func_name # NNabla backward orig_grad = rng.randn(*i.shape).astype(i.data.dtype) i.g[...] = orig_grad o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.g = o_grad o.parent.backward([i], [o]) # Verify if flip_close: ref_grad = ref_flip(o_grad, axes) else: ref_grad = o_grad assert np.allclose(i.g, orig_grad + ref_grad) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad) # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if need_grad works i.g[...] = 0 i.need_grad = False o.backward(o_grad) assert np.all(i.g == 0)
def test_graph_unlink_backward(seed): rng = np.random.RandomState(seed) x0 = nn.Variable([2, 4], need_grad=True) x1 = nn.Variable([2, 4], need_grad=True) x0.d = rng.randn(*x0.shape) x1.d = rng.randn(*x1.shape) x0.grad.zero() x1.grad.zero() with nn.auto_forward(): with nn.parameter_scope("fc0"): h0 = PF.affine(x0, 2) with nn.parameter_scope("fc1"): h1 = PF.affine(x1, 2) h0.need_grad = False h = h0 + h1 with nn.parameter_scope("fc"): y = PF.affine(h, 1) y.backward(clear_buffer=True) assert np.all(x0.g == 0) assert not np.all(x1.g == 0)
def ref_grad_binary_connect_convolution(x, w, wb, b, dy, base_axis, pad, stride, dilation, group): # Set variables vx = nn.Variable(x.shape, need_grad=True) vx.d = x vx.grad.zero() vw = nn.Variable(w.shape, need_grad=True) vw.d = binarize(w) vw.grad.zero() vb = None if b is not None: vb = nn.Variable(b.shape, need_grad=True) vb.d = b vb.grad.zero() # Execute binarized forward and back prop. with nn.auto_forward(): vy = F.convolution(vx, vw, vb, base_axis, pad, stride, dilation, group) vy.backward(dy) # Return grads if b is None: return np.concatenate([vx.g.flat, vw.g.flat]) return np.concatenate([vx.g.flat, vw.g.flat, vb.g.flat])
def function_tester(rng, func, ref_func, inputs, func_args=[], func_kwargs={}, atol_f=1e-6, atol_b=1e-3, dstep=1e-3, backward=None, ctx=None, func_name=None, ref_grad=None): """ Automatic testing of forward/backward pass of `func` by comparing it to the reference implementation in `ref_func`. Syntax of `ref_func`: inputs, parametes Syntax of `ref_grad`: inputs, output grads, parameters """ if ctx is None: ctx = nn.Context() if backward is None: backward = [True for _ in inputs] # Create Variables # print 'create_variable' def create_variables(inputs, backward): vinputs = [] for i, b in zip(inputs, backward): if i is None: vinputs += [None] continue vinputs += [nn.Variable(i.shape, need_grad=b)] vinputs[-1].data.cast(i.dtype)[...] = i return vinputs vinputs = create_variables(inputs, backward) # Checking forward # print 'checking forward' with nn.context_scope(ctx), nn.auto_forward(): o = func(*(vinputs + func_args), **func_kwargs) rinputs = copy.deepcopy(inputs) # inputs for ref_func refs = ref_func(*(rinputs + func_args), **func_kwargs) def force_tuple(x): if isinstance(x, tuple): return x return (x,) refs = force_tuple(refs) o = force_tuple(o) assert len(o) == len(refs) for i, ref in enumerate(refs): res = o[i].d assert np.allclose(ref, res, atol=atol_f) # Checking function name # print 'checking function name' if func_name is not None: assert o[0].parent.name == func_name # Checking backward # print 'checking backward' if not True in backward: return # NNabla backward for v in vinputs: if v is None: continue if len(v.shape) == 0: v.g = rng.randn() continue v.g = rng.randn(*v.shape).astype(v.data.dtype) # Verify grad vinputs = create_variables(inputs, backward) rinputs = copy.deepcopy(inputs) rinputs = [rinput if test else None for rinput, test in zip(rinputs, backward)] vgrads = [rng.randn(*o_.shape) for o_ in o] agrads, ngrads = compute_analytical_and_numerical_grad( o[0].parent, vinputs, o, rinputs, vgrads, epsilon=dstep, rng=rng) if ref_grad is not None: rinputs = copy.deepcopy(inputs) doutputs = [o_.g for o_ in o] ngrads = ref_grad(*(rinputs + doutputs + func_args), **func_kwargs) assert np.allclose(ngrads, agrads, atol=atol_b) # Check if need_grad works for v, b in zip(vinputs, backward): if not b or v is None: continue v.g = 0 v.need_grad = False try: o[0].parent.backward( list(filter(lambda x: x is not None, vinputs)), o) except RuntimeError as e: continue # TODO assert np.all(v.g == 0) # test accum=False for i in range(len(vinputs)): if vinputs[i] is None: continue v = vinputs[i] v.need_grad = backward[i] for i in range(len(vinputs)): if vinputs[i] is None: continue v = vinputs[i] if not backward[i]: continue f = o[0].parent # If input's grad is inplaced, the test doesn't work correctly. if f.inplace_grad(i): continue # Prepare function inputs finputs = list(filter(lambda x: x is not None, vinputs)) # Save accum gradient result g = np.random.randn(*v.shape) v.g = g f.forward(finputs, o) f.backward(finputs, o) true_g = v.g - g # Check accum=False accum = [j != i for j in range(len(finputs))] v.g = np.random.randn(*v.shape) f.forward(finputs, o) f.backward(finputs, o, accum) assert np.allclose(v.g, true_g, atol=1e-6) # Check accum=False with NaN gradient v.g = np.float32('nan') f.forward(finputs, o) f.backward(finputs, o, accum) assert not np.any(np.isnan(v.g))