def inplace_function_test_helper(inputs, func, func_args=[], func_kwargs={}, ctx=None, rng=None): if rng is None: rng = np.random.RandomState(313) if ctx is None: ctx = nn.Context() with nn.context_scope(ctx): a_s = [inp * 1.0 for inp in inputs] y = func(*(a_s + list(func_args)), inplace=False, **func_kwargs) l = F.sum(y) a_s_i = [inp * 1.0 for inp in inputs] y_i = func(*(a_s_i + list(func_args)), inplace=True, **func_kwargs) l_i = F.sum(y_i) data = [(rng.randn(*inp.shape), rng.randn(*inp.shape)) for inp in inputs] for i in range(len(data)): inputs[i].d = data[i][0] inputs[i].g = data[i][1] l.forward() l.backward() grads = [inp.g.copy() for inp in inputs] for i in range(len(data)): inputs[i].d = data[i][0] inputs[i].g = data[i][1] l_i.forward() l_i.backward() grads_i = [inp.g.copy() for inp in inputs] for g, g_i in zip(grads, grads_i): assert np.allclose(g, g_i), str(ArrayDiffStats(g, g_i))
def list_context(func_name): try: import list_context_ext return list_context_ext.list(func_name) except Exception as e: print(e) return [(nn.Context(), func_name)]
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definition nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 8) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 8) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) else: raise ValueError() l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop L.forward(clear_no_need_grad=True) # Backprop # Diff should be initialized since they are always accumulated x.grad.zero() L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) parameters = nn.get_parameters() for param in parameters.values(): param.grad.zero() inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert_allclose(ngrad, agrad, atol=1.05e-2)
def context(type_config='float', **kw): """CPU Context.""" backends = ['cpu:float'] if type_config == 'half': backends = ['cpu:half', 'cpu:float'] elif type_config == 'float': pass else: raise ValueError("Unknown data type config is given %s" % type_config) return nn.Context(backends, array_classes()[0], '')
def test_cuda_large_blocks(m): CUDA_THREAD_PER_BLOCK = 512 CUDA_MAX_BLOCKS = 65536 size = CUDA_MAX_BLOCKS * CUDA_THREAD_PER_BLOCK * m + 3 print "Variable size:", size x = np.zeros((size, ), np.float32) v = nn.Variable(x.shape) v.d = x ctx = nn.Context(backend='cuda') y = F.relu(v)
def test_function_context(seed): rng = np.random.RandomState(313) xd = rng.randn(2, 3) x = nn.Variable.from_numpy_array(xd) ctx1 = nn.Context(backend=['cpu:float'], array_class='CpuCachedArray', device_id='1') with nn.context_scope(ctx1): y = F.relu(x) ctx0 = nn.Context(backend=['cpu:float'], array_class='CpuCachedArray', device_id='0') # TODO: use id or hash if we determine the spec assert str(ctx0) != str(ctx1) assert str(ctx1) == str(y.parent.context) with nn.context_scope(y.parent.context): z = F.relu(x) assert str(y.parent.context) == str(z.parent.context)
def test_graph_logreg(seed): rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4], need_grad=True) w1 = nn.Variable([12, 5], need_grad=True) w2 = nn.Variable([12, 5], need_grad=True) b1 = nn.Variable([5], need_grad=True) b2 = nn.Variable([5], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) w1.d = rng.randn(*w1.shape) w2.d = rng.randn(*w2.shape) b1.d = rng.randn(*b1.shape) b2.d = rng.randn(*b2.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion z1 = F.affine(x, w1, b1, 1) z2 = F.affine(x, w2, b2, 1) l1 = F.softmax_cross_entropy(z1, t, 1) L1 = F.mean(l1) l2 = F.softmax_cross_entropy(z2, t, 1) L2 = F.mean(l2) nn.forward_all([L1, L2]) # Backprop for z1 # Diff should be initialized since they are always accumulated x.g = 0 w1.g = 0 b1.g = 0 L1.backward(clear_buffer=True) inputs = [x, w1, b1] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L1, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1e-2) # Backprop for z2 # Diff should be initialized since they are always accumulated x.g = 0 w2.g = 0 b2.g = 0 L2.backward(clear_buffer=True) inputs = [x, w2, b2] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L2, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1e-2)
def solver_tester(rng, solver, ref_solver, solver_args=[], solver_kwargs={}, num_itr=5, decay=1e-4, atol=1e-6, ctx=None, solver_name=None): if ctx is None: ctx = nn.Context() # Create params p1 = nn.Variable([2, 3, 4]) p2 = nn.Variable([3, 4, 1, 2]) p3 = nn.Variable([]) params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)]) for p in params.values(): p.d = rng.randn(*p.shape) p.g = rng.randn(*p.shape) with nn.context_scope(ctx): s = solver(*solver_args, **solver_kwargs) s.set_parameters(params) if solver_name is not None: assert s.name == solver_name ref_s = ref_solver(*solver_args, **solver_kwargs) ref_s.set_parameters(params) # Check weight decay. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.weight_decay(decay) ref_s.weight_decay(grad_copy, decay) for p, ref_p in zip(params.values(), grad_copy.values()): assert np.allclose(ref_p, p.g, atol=atol) # Check solver udpate. for i in range(num_itr): grads = OrderedDict([(k, rng.randn(*p.shape)) for k, p in iteritems(params)]) for k, g in iteritems(grads): params[k].g = g s.update() ref_s.update(grads) for p, ref_p in zip(params.values(), ref_s.params.values()): assert np.allclose(ref_p, p.d, atol=atol) # Check if remove_state_impl work correctly. s.clear_parameters()
def visualize(args): """ Visualizing embedded digits onto 2D space. """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt batch_size = 500 # Create default context. ctx = nn.Context(backend="cpu|cuda", compute_backend="default|cudnn", array_class="CudaArray", device_id="{}".format(args.device_id)) # Load parameters nn.load_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter)) # Create embedder network image = nn.Variable([batch_size, 1, 28, 28]) feature = mnist_lenet_feature(image, test=False) # Process all images features = [] labels = [] # Prepare MNIST data iterator rng = np.random.RandomState(313) data = data_iterator_mnist(batch_size, train=False, shuffle=True, rng=rng) for i in range(10000 // batch_size): image_data, label_data = data.next() image.d = image_data / 255. feature.forward(clear_buffer=True) features.append(feature.d.copy()) labels.append(label_data.copy()) features = np.vstack(features) labels = np.vstack(labels) # Visualize f = plt.figure(figsize=(16, 9)) for i in range(10): c = plt.cm.Set1(i / 10.) plt.plot(features[labels.flat == i, 0].flatten(), features[labels.flat == i, 1].flatten(), '.', c=c) plt.legend(map(str, range(10))) plt.grid() plt.savefig(os.path.join(args.monitor_path, "embed.png"))
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) z4 = PF.affine(z2, 5) l1 = F.softmax_cross_entropy(z3, t, 1) L1 = F.mean(l1) l2 = F.softmax_cross_entropy(z4, t, 1) L2 = F.mean(l2) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() nn.forward_all([L1, L2], clear_no_need_grad=cnng) # for now, the first backward cannot be # called with clear_buffer=True L1.backward(clear_buffer=False) L2.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() import platform if platform.machine() == 'ppc64le': pytest.skip("This test fails on ppc64le") assert np.all(g == g2)
def _context(proto): comm = current_communicator() if not proto.backends: logger.warn('Old-style context. Updating to new format.') # Update from old Context backends = [x.strip() for x in proto.backend.split('|')] compute_backends = [ x.strip() for x in proto.compute_backend.split('|') ] if 'cuda' in backends: device_id = str(proto.device_id) if comm: device_id = str(comm.local_rank) if 'cudnn' in compute_backends: try: import nnabla_ext.cudnn ctx = nnabla_ext.cudnn.context(device_id=device_id) except ImportError: logger.warn('Fallback to CPU context.') import nnabla_ext.cpu ctx = nnabla_ext.cpu.context() elif 'default' in compute_backends: try: import nnabla_ext.cuda ctx = nnabla_ext.cuda.context(device_id=device_id) except ImportError: logger.warn('Fallback to CPU context.') import nnabla_ext.cpu ctx = nnabla_ext.cpu.context() else: raise ValueError('Invalid compute_backend {}'.format( proto.compute_backend)) elif 'cpu' in backends: import nnabla_ext.cpu ctx = nnabla_ext.cpu.context() else: raise ValueError('Invalid context {}'.format(proto)) ctx.array_class = str(proto.array_class) return ctx ctx = nn.Context() ctx.backend = proto.backends ctx.array_class = str(proto.array_class) if comm: ctx.device_id = str(comm.local_rank) else: ctx.device_id = str(proto.device_id) return ctx
def test_rehape(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_s = v2.reshape((3, 4, 2)) v3 = F.identity(v2_s) v3.backward(clear_buffer=False) assert np.all(v2_s.g.flat == v2.g.flat) assert np.all(v2_s.g == 1) v2.d = 1 assert np.all(v2_s.d == 1) v2.g = 1.5 assert np.all(v2_s.g == 1.5)
def test_unlinked(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_u = v2.unlinked() v3 = F.identity(v2_u) v2_u.grad.zero() v2_g = v2_u.g.copy() v3.backward(clear_buffer=False) assert type(v2_u) == type(v2) assert np.all(v.g == grad) assert np.all(v2_u.g == v2.g) assert np.all(v2_u.g == v2_g + 1)
def list(func_name): sys.path.append( os.path.join(os.path.dirname(__file__), '..', '..', 'build-tools', 'code_generator')) from load_implements_rst import Implements l = [(nn.Context(), func_name)] info = Implements().info if func_name in info: if 'cuda' in info[func_name]: import nnabla_ext.cuda l.append((nnabla_ext.cuda.context(), func_name + 'Cuda')) if 'cudnn' in info[func_name]: import nnabla_ext.cuda.cudnn l.append( (nnabla_ext.cuda.cudnn.context(), func_name + 'CudaCudnn')) return l
def ref_grad_spectral_norm(w, u, dy, du, dim, itr, eps, test, output_u, need_grad_flags): # We need this function for using `function_tester` # because the numerical gradient of `w` will not be calculated correctly. # The reason is there are some intermediate variables with `need_grad == false` # which are connected to the input `w` in the function composite implementation. cpu_context = nn.Context(["cpu:float"]) with nn.context_scope(cpu_context): w = nn.Variable.from_numpy_array(w) u = nn.Variable.from_numpy_array(u) w.need_grad = True w.grad.zero() w_sn = PF._spectral_norm_v1( w, u_init=u.data.get_data('r'), dim=dim, itr=itr, test=test) w_sn.forward(clear_no_need_grad=True) w_sn.backward(dy, clear_buffer=True) return w.grad.get_data('r').flatten()
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() L.forward(clear_no_need_grad=cnng) L.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() assert np.all(g == g2)
def clear_no_need_grad_tester(rng, func, inputs, func_args=[], func_kwargs={}, backward=None, atol_f=1e-6, ctx=None, func_name=None, insert_identity=[], auto_forward=False): if ctx is None: ctx = nn.Context() if backward is None: backward = [True for _ in inputs] if not True in backward: return state_rng = None if rng is not None: state_rng = rng.get_state() else: rng = rng = np.random.RandomState(313) def create_variables(inputs, backward): vinputs = [] for i, b in zip(inputs, backward): if i is None: vinputs += [None] continue vinputs += [nn.Variable(i.shape, need_grad=b)] vinputs[-1].data.cast(i.dtype)[...] = i return vinputs vinputs = create_variables(inputs, backward) vinputs_clear_buffer = create_variables(inputs, backward) vinputs_identity_clear_buffer = [] if not insert_identity: insert_identity = [True] * len(vinputs) with nn.context_scope(ctx), nn.auto_forward(auto_forward): for idx, i in enumerate(vinputs_clear_buffer): if i is None: vinputs_identity_clear_buffer += [None] elif insert_identity[idx]: vinputs_identity_clear_buffer += [F.identity(i)] else: vinputs_identity_clear_buffer += [i] # Checking forward(clear_no_need_grad=True) with nn.context_scope(ctx), nn.auto_forward(auto_forward): o = func(*(vinputs + func_args), **func_kwargs) o = force_tuple(o) F.sink(*o).forward(clear_no_need_grad=False) o_clear_buffer = func(*(vinputs_identity_clear_buffer + func_args), **func_kwargs) o_clear_buffer = force_tuple(o_clear_buffer) o_identity_clear_buffer = list( map(lambda x: F.identity(x) if x is not None else None, o_clear_buffer)) o_identity_clear_buffer = list( filter(lambda x: x is not None, o_identity_clear_buffer)) F.sink(*o_identity_clear_buffer).forward(clear_no_need_grad=True) for i in range(len(o)): if o[i] is None: continue ref = o[i].d res = o_identity_clear_buffer[i].d assert_allclose( ref, res, atol=atol_f, err_msg="{} forward(clear_no_need_grad=True) test fails".format( func_name)) vinputs = list(filter(lambda x: x is not None, vinputs)) vinputs_clear_buffer = list( filter(lambda x: x is not None, vinputs_clear_buffer)) for i in range(len(vinputs)): vinputs[i].grad.zero() vinputs_clear_buffer[i].grad.zero() for i in range(len(o)): if o[i] is None: continue o[i].g = randn(rng, *o[i].shape) o_identity_clear_buffer[i].g = o[i].g F.sink(*o).backward() F.sink(*o_identity_clear_buffer).backward(clear_buffer=True) for i in range(len(vinputs)): ref = vinputs[i].g res = vinputs_clear_buffer[i].g assert_allclose( ref, res, atol=atol_f, err_msg="{} forward(clear_no_need_grad=True) and backward test fails" .format(func_name)) if state_rng: rng.set_state(state_rng)
def backward_function_tester(rng, func, ref_func, inputs, func_args=[], func_kwargs={}, atol_f=1e-6, atol_b=1e-3, atol_accum=1e-3, dstep=1e-3, backward=None, ctx=None, func_name=None, ref_grad=None, disable_half_test=False, atol_half=1e-1): """Backward function tester In the forward test, it compares the results of nn.grad and `func`.backward. In the backward test, it compares the analytical gradients and numerical gradient with `grad_outputs`. """ # TODO: half from scipy.optimize import approx_fprime if ctx is None: ctx = nn.Context() if backward is None: backward = [True if i is not None else False for i in inputs] # TODO: Remove set_default_context after adding ctx to BackwardFunction. nn.set_default_context(ctx) # Create Variables def create_variables(inputs, backward): vinputs = [] for i, b in zip(inputs, backward): if i is None: vinputs += [None] continue vinputs += [nn.Variable(i.shape, need_grad=b)] vinputs[-1].data.cast(i.dtype)[...] = i return vinputs # Create grad_outputs def create_grad_outputs(outputs): grad_outputs = [] for o in outputs: if o.shape == (): go = nn.NdArray.from_numpy_array(np.array(randn(rng))) #go = nn.NdArray.from_numpy_array(np.array(1.0)) else: go = nn.NdArray.from_numpy_array(randn(rng, *o.shape)) #go = nn.NdArray.from_numpy_array(np.ones(o.shape)) grad_outputs.append(go) return grad_outputs # Fill grads def fill_grads(vinputs, grads): for vi, gd in zip(vinputs, grads): if vi is None: continue vi.g = gd # Fill grads def zero_grads(vinputs): for vi in vinputs: if vi is None: continue vi.grad.zero() return # Gradient penalty on grads def gradient_penalty2(grads): gp2 = 0.0 for g in grads: gp2 += F.sum(g**2.0) return gp2 # Product sum def prod_sum(inputs0, inputs1): out = 0.0 for inp0, inp1 in zip(inputs0, inputs1): out += inp0 * nn.Variable(inp1.shape).apply(data=inp1) return out # Set inputs for the numerical gradients def set_inputs(inputs0, vinputs): begin = 0 for i in vinputs: end = begin + i.size if i.need_grad == True: i.d = inputs0[begin:end].reshape(i.shape) begin = end # Gradient penalty on grads used for computing numerical gradients def obj_func(inputs0, gp2, vinputs): set_inputs(inputs0, vinputs) gp2.forward() return gp2.d.copy() # # Half test # if not disable_half_test: # finputs = create_variables(inputs, backward) # hinputs = create_variables(inputs, backward) # half_test(rng, func, finputs, hinputs, func_args, # func_kwargs, backward, ctx, func_name, atol=atol_half) # Create input variables vinputs = create_variables(inputs, backward) # --- Forward test --- # # Zero grads zero_grads(vinputs) # Forward/Backward on the forward graph voutputs = [ F.sigmoid(x) for x in force_list(func(*(vinputs + func_args), **func_kwargs)) ] agrad_outputs = create_grad_outputs(voutputs) o = prod_sum(voutputs, agrad_outputs) o.forward() o.backward() # clear_buffer=True) # Grads voutputs = voutputs vinputs = list(filter(lambda vi: vi is not None, vinputs)) agrad_outputs = agrad_outputs grads = nn.grad(voutputs, vinputs, agrad_outputs) grads = list(filter(lambda x: x is not None, grads)) o = F.sink(*grads) o.forward() # Check forward for vi, go in zip(vinputs, grads): if vi.need_grad is False: continue fgrads = vi.g bgrads = go.d assert_allclose(fgrads, bgrads, atol=atol_f) # TODO: 1. Pass function argument directly to backward functions. # TODO: 2. should be changed for the simplier form by simply testing BackwardFunction # --- Backward (accum = False) test --- # # Zero grads zero_grads(vinputs) # Compute analytical grads gp2 = gradient_penalty2(grads) gp2.forward() gp2.backward(clear_buffer=True) analytical_grads = np.concatenate( [vi.g.copy().flatten() for vi in vinputs]) analytical_grads0 = analytical_grads # Compute numerical grads inputs0 = np.concatenate( [inp.flatten() for inp in inputs if inp is not None]) numerical_grads = approx_fprime(inputs0, obj_func, dstep, gp2, vinputs) # Check backward assert_allclose(analytical_grads, numerical_grads, atol=atol_b) # --- Backward (accum = True) test --- # # Random grads rand_grads = [randn(rng, *vi.shape) for vi in vinputs] fill_grads(vinputs, rand_grads) # Compute analytical grads gp2.forward() gp2.backward(clear_buffer=True) analytical_grads = np.concatenate( [vi.g.copy().flatten() for vi in vinputs]) rand_grads = np.concatenate([ rg.flatten() if isinstance(rg, np.ndarray) else np.array(rg).reshape( (1, )) for rg in rand_grads ]) analytical_grads -= rand_grads # Check backward assert_allclose(analytical_grads, analytical_grads0, atol=atol_accum)
def function_tester(rng, func, ref_func, inputs, func_args=[], func_kwargs={}, atol_f=1e-6, atol_b=1e-3, atol_accum=1e-6, dstep=1e-3, backward=None, ctx=None, func_name=None, ref_grad=None, disable_half_test=False, atol_half=1e-1): """ Automatic testing of forward/backward pass of `func` by comparing it to the reference implementation in `ref_func`. Syntax of `ref_func`: inputs, parameters Syntax of `ref_grad`: inputs, output grads, parameters """ if ctx is None: ctx = nn.Context() if backward is None: backward = [True for _ in inputs] # Create Variables # print('create_variable') def create_variables(inputs, backward): vinputs = [] for i, b in zip(inputs, backward): if i is None: vinputs += [None] continue vinputs += [nn.Variable(i.shape, need_grad=b)] vinputs[-1].data.cast(i.dtype)[...] = i return vinputs # Half test if not disable_half_test: finputs = create_variables(inputs, backward) hinputs = create_variables(inputs, backward) half_test(rng, func, finputs, hinputs, func_args, func_kwargs, backward, ctx, func_name, atol=atol_half) vinputs = create_variables(inputs, backward) # Checking forward # print('checking forward') with nn.context_scope(ctx), nn.auto_forward(): o = func(*(vinputs + func_args), **func_kwargs) rinputs = copy.deepcopy(inputs) # inputs for ref_func refs = ref_func(*(rinputs + func_args), **func_kwargs) refs = force_tuple(refs) o = force_tuple(o) assert len(o) == len(refs) for i, ref in enumerate(refs): res = o[i].d assert np.allclose(ref, res, atol=atol_f), str(ArrayDiffStats(ref, res)) # Checking function name try: import function_test_callback result = create_function_nnp(vinputs, o, func_name, func_args, func_kwargs) if result is not None: function_test_callback.callback(func_name, *result) except UnboundLocalError: pass except IndexError: pass except ImportError: pass # print('checking function name') if func_name is not None: assert o[0].parent.name == func_name # Checking backward # print('checking backward') if not True in backward: return # NNabla backward for v in vinputs: if v is None: continue if len(v.shape) == 0: v.g = rng.randn() continue v.g = rng.randn(*v.shape).astype(v.data.dtype) # Verify grad vinputs = create_variables(inputs, backward) rinputs = copy.deepcopy(inputs) rinputs = [ rinput if test else None for rinput, test in zip(rinputs, backward) ] vgrads = [rng.randn(*o_.shape) for o_ in o] agrads, ngrads = compute_analytical_and_numerical_grad(o[0].parent, vinputs, o, rinputs, vgrads, epsilon=dstep, rng=rng, ref_grad=ref_grad) if ref_grad is not None: rinputs = copy.deepcopy(inputs) doutputs = [o_.g for o_ in o] ngrads = ref_grad(*(rinputs + doutputs + func_args), **func_kwargs) assert np.allclose(ngrads, agrads, atol=atol_b), str(ArrayDiffStats(ngrads, agrads)) # Check if need_grad works for v, b in zip(vinputs, backward): if not b or v is None: continue v.g = 0 v.need_grad = False try: o[0].parent.backward( list(filter(lambda x: x is not None, vinputs)), o) except RuntimeError as e: continue # TODO assert np.all(v.g == 0) # test accum=False for i in range(len(vinputs)): if vinputs[i] is None: continue v = vinputs[i] v.need_grad = backward[i] for i in range(len(vinputs)): if vinputs[i] is None: continue v = vinputs[i] if not backward[i]: continue f = o[0].parent # If input's grad is inplaced, the test doesn't work correctly. if f.inplace_grad(i): continue # Prepare function inputs finputs = list(filter(lambda x: x is not None, vinputs)) # Save accum gradient result g = rng.randn(*v.shape) v.g = g f.forward(finputs, o) f.backward(finputs, o) true_g = v.g - g # Check accum=False accum = [j != i for j, vv in enumerate(vinputs) if vv is not None] v.g = rng.randn(*v.shape) f.forward(finputs, o) f.backward(finputs, o, accum) assert np.allclose(v.g, true_g, atol=atol_accum), str(ArrayDiffStats(v.g, true_g)) # Check accum=False with NaN gradient v.g = np.float32('nan') f.forward(finputs, o) f.backward(finputs, o, accum) assert not np.any(np.isnan(v.g))
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest import numpy as np import nnabla as nn import nnabla.functions as F from nbla_test_utils import list_context ctxs = list_context('PReLU') if hasattr(nn.extensions, 'cuda'): ctxs += [(nn.Context(backend='cuda'), 'PReLUCuda')] def ref_prelu(x, w, base_axis=1): wshape = [1 for _ in range(x.ndim)] if w.size != 1: wshape[base_axis] = w.size return np.maximum(0, x) + w.reshape(wshape) * np.minimum(0, x) @pytest.mark.parametrize("seed", [313]) @pytest.mark.parametrize("inshape, wshape, base_axis", [((2, 3, 2, 3, 2), tuple(), 4), ((2, 3, 1, 3), (3, ), 1)]) @pytest.mark.parametrize("ctx, func_name", ctxs) def test_prelu_forward_backward(seed, inshape, wshape, base_axis, ctx,
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest import numpy as np import nnabla as nn import nnabla.functions as F ctxs = [(nn.Context(), 'ConfusionMatrix')] if hasattr(nn.extensions, 'cuda'): ctxs += [(nn.extensions.cuda.context(), 'ConfusionMatrixCuda')] def ref_confusion_matrix(x, l, axis): orig_x = x.copy() x = np.rollaxis(x, axis, x.ndim).reshape(-1, x.shape[axis]) ll = np.rollaxis(l, axis, x.ndim).flatten() y = np.zeros((orig_x.shape[axis], orig_x.shape[axis]), np.int) for x_, ll_ in zip(x, ll): index = -1 for i, x__ in enumerate(x_): if x__ >= x_[index]: index = i y[ll_][index] += 1
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest import numpy as np import nnabla as nn import nnabla.functions as F from nbla_test_utils import list_context from nnabla.testing import assert_allclose ctxs = list_context('FusedConvolution') cpu_context = nn.Context(["cpu:float"]) class RefFusedConvolutionGraph(object): def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args): from collections import OrderedDict inputs = OrderedDict() xvar = nn.Variable.from_numpy_array(x) weightvar = nn.Variable.from_numpy_array(weight) inputs['x'] = xvar inputs['weight'] = weightvar biasvar = None betavar = None
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import pytest import numpy as np import nnabla as nn import nnabla.functions as F ctxs = [(nn.Context(), 'BinaryError')] if hasattr(nn.extensions, 'cuda'): ctxs += [(nn.extensions.cuda.context(), 'BinaryErrorCuda')] def ref_binary_error(x, l): y = [] for x_, l_ in zip(x, l): y.append((x_ >= 0.5) != (l_ >= 0.5)) return np.array(y).reshape(x.shape) @pytest.mark.parametrize("ctx, func_name", ctxs) @pytest.mark.parametrize("seed", [313]) def test_binary_error_forward(seed, ctx, func_name): ishape = [5, 6, 7]
def pack_padded_sequence(padded_sequence, lengths, batch_first=False, enforce_sorted=True): r"""Pack a padded variable-length sequences. This method packs a padded variable-length sequences. :math:`T` is the max length over the lengths of sequences. :math:`B` is the batch size equal to the length of the sequences. :math:`*` is the remaining dimensions including none. .. note:: This function **must** be used the dynamic computation mode. Example: .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F import nnabla.utils.rnn as rnn_utils nn.set_auto_forward(True) l2v = lambda ldata: nn.Variable.from_numpy_array(np.asarray(ldata)) a = l2v([1, 1, 1, 1]) b = l2v([2, 2, 2]) c = l2v([2, 2, 2]) d = l2v([3, 3]) e = l2v([3, 3]) sequences = [a, b, c, d, e] lengths = l2v([seq.shape[0] for seq in sequences]) padded_sequence = rnn_utils.pad_sequence(sequences) print(padded_sequence.d) packed_sequence = rnn_utils.pack_padded_sequence(padded_sequence, lengths) print(packed_sequence.data.d) print(packed_sequence.batch_sizes.d) Args: padded_sequence (:obj:`nnabla.Variable`): Padded sequence of (:math:`T \times B \times *`) or (:math:`B \times T \times *`) shape. lengths (:obj:`nnabla.Variable`): Sequence length for each batch and always resides in CPU. batch_first (bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False, otherwise (:math:`B`, :math:`T`, :math:`*`). enforce_sorted (bool): Sequences are sorted by the length in a decreasing order if True. Default is True. Returns: :obj:`PackedSequence` """ if enforce_sorted: sorted_indices = None unsorted_indices = None else: # TODO: replace cuda context when the bug fix of the sort with nn.context_scope(nn.Context()): lengths, sorted_indices = F.sort(lengths, axis=0, reverse=True, with_index=True) B = sorted_indices.shape[0] unsorted_indices = F.scatter_nd(F.arange(0, B), sorted_indices.reshape((1, B)), shape=(B, )) axis = 0 if batch_first else 1 padded_sequence = F.gather(padded_sequence, sorted_indices, axis) packed_sequence, batch_sizes = F.pack_padded_sequence( padded_sequence, lengths, batch_first) packed_sequence0 = PackedSequence() packed_sequence0.data = packed_sequence packed_sequence0.batch_sizes = batch_sizes packed_sequence0.sorted_indices = sorted_indices packed_sequence0.unsorted_indices = unsorted_indices return packed_sequence0
def solver_tester(rng, solver, ref_solver, solver_args=[], solver_kwargs={}, num_itr=5, decay=1e-4, clip_norm=0.5, atol=1e-6, ctx=None, solver_name=None): if ctx is None: ctx = nn.Context() # Create params p1 = nn.Variable([2, 3, 4]) p2 = nn.Variable([3, 4, 1, 2]) p3 = nn.Variable([]) params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)]) for p in params.values(): p.d = rng.randn(*p.shape) p.g = rng.randn(*p.shape) with nn.context_scope(ctx): s = solver(*solver_args, **solver_kwargs) s.set_parameters(params) if solver_name is not None: assert s.name == solver_name ref_s = ref_solver(*solver_args, **solver_kwargs) ref_s.set_parameters(params) # Get params (unordered_map is used in C++, thus check in both directions) params_ = s.get_parameters() for k0, v0 in iteritems(ref_s.params): v1 = params_[k0] assert_allclose(v0, v1.d, atol=atol) for k1, v1 in iteritems(params_): v0 = ref_s.params[k1] assert_allclose(v0, v1.d, atol=atol) # Check weight decay. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.weight_decay(decay) ref_s.weight_decay(grad_copy, decay) for p, ref_p in zip(params.values(), grad_copy.values()): assert_allclose(ref_p, p.g, atol=atol) # Check clip grad by norm. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.clip_grad_by_norm(clip_norm) ref_s.clip_grad_by_norm(grad_copy, clip_norm) for p, ref_p in zip(params.values(), grad_copy.values()): assert np.allclose(ref_p, p.g, atol=atol) # Check solver udpate. for i in range(num_itr): grads = OrderedDict([(k, rng.randn(*p.shape)) for k, p in iteritems(params)]) for k, g in iteritems(grads): params[k].g = g s.update() ref_s.update(grads) # update check for p, ref_p in zip(params.values(), ref_s.params.values()): assert_allclose(ref_p, p.d, atol=atol) # iteration state incrementaion check for state in s.get_states().values(): assert state.t == (i + 1) # Check inf, nan, and inf/nan for v, method in zip([[np.inf], [np.nan], [np.inf, np.nan]], [ lambda s: s.check_inf_grad(), lambda s: s.check_nan_grad(), lambda s: s.check_inf_or_nan_grad() ]): def set_value(p): p.g[...] = rng.choice(v + [-1, 0, 1], size=int(np.prod(p.shape)), replace=True).reshape(p.shape) if v[0] not in p.g: p.g.flat[rng.choice(np.arange(int(np.prod(p.shape))))] = v[0] for p in params.values(): assert method(s) == False g = p.g.copy() set_value(p) assert method(s) == True p.g[...] = g # Rescale grad scale = 10. ref_grad = [p.g.copy() for p in params.values()] for p in params.values(): p.g *= scale s.scale_grad(1. / scale) for ref, p in zip(ref_grad, params.values()): assert_allclose(ref, p.g, atol=1e-4) # Save/Load Test def test_save_load(s, name): # Save states import tempfile tmpdir = tempfile.mkdtemp("solver-test") tmpfile = os.path.join(tmpdir, name) states0 = s.get_states() s.save_states(tmpfile) # Load states with nn.context_scope(ctx): s1 = solver(*solver_args, **solver_kwargs) s1.set_parameters(params) s1.load_states(tmpfile) # Check save/load states states1 = s1.get_states() for k0, s0 in iteritems(states0): s1 = states1[k0] for sname, vx0 in iteritems(s0.pstate): vx1 = s1.pstate[sname] assert_allclose(vx0.d, vx1.d) assert s1.t == s0.t test_save_load(s, "states.h5") test_save_load(s, "states.protobuf") # Check if remove_state_impl work correctly. s.clear_parameters()
def solver_tester(rng, solver, ref_solver, solver_args=[], solver_kwargs={}, num_itr=5, decay=1e-4, atol=1e-6, ctx=None, solver_name=None): if ctx is None: ctx = nn.Context() # Create params p1 = nn.Variable([2, 3, 4]) p2 = nn.Variable([3, 4, 1, 2]) p3 = nn.Variable([]) params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)]) for p in params.values(): p.d = rng.randn(*p.shape) p.g = rng.randn(*p.shape) with nn.context_scope(ctx): s = solver(*solver_args, **solver_kwargs) s.set_parameters(params) if solver_name is not None: assert s.name == solver_name ref_s = ref_solver(*solver_args, **solver_kwargs) ref_s.set_parameters(params) # Check weight decay. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.weight_decay(decay) ref_s.weight_decay(grad_copy, decay) for p, ref_p in zip(params.values(), grad_copy.values()): assert np.allclose(ref_p, p.g, atol=atol) # Check solver udpate. for i in range(num_itr): grads = OrderedDict([(k, rng.randn(*p.shape)) for k, p in iteritems(params)]) for k, g in iteritems(grads): params[k].g = g s.update() ref_s.update(grads) for p, ref_p in zip(params.values(), ref_s.params.values()): assert np.allclose(ref_p, p.d, atol=atol) # Check inf, nan, and inf/nan for v, method in zip([[np.inf], [np.nan], [np.inf, np.nan]], [ lambda s: s.check_inf_grad(), lambda s: s.check_nan_grad(), lambda s: s.check_inf_or_nan_grad() ]): def set_value(p): p.g[...] = rng.choice(v + [-1, 0, 1], size=int(np.prod(p.shape)), replace=True).reshape(p.shape) if v[0] not in p.g: p.g.flat[rng.choice(np.arange(int(np.prod(p.shape))))] = v[0] for p in params.values(): assert method(s) == False g = p.g.copy() set_value(p) assert method(s) == True p.g[...] = g # Rescale grad scale = 10. ref_grad = [p.g.copy() for p in params.values()] for p in params.values(): p.g *= scale s.scale_grad(1. / scale) for ref, p in zip(ref_grad, params.values()): assert np.allclose(ref, p.g, atol=1e-4) # Check if remove_state_impl work correctly. s.clear_parameters()
def function_tester(rng, func, ref_func, inputs, func_args=[], func_kwargs={}, atol_f=1e-6, atol_b=1e-3, atol_accum=1e-6, dstep=1e-3, backward=None, ctx=None, func_name=None, ref_grad=None, disable_half_test=False, atol_half=1e-1, insert_identity=[], disable_clear_no_need_grad_test=False, auto_forward=False): """ Automatic testing of forward/backward pass of `func` by comparing it to the reference implementation in `ref_func`. Syntax of `ref_func`: inputs, parameters Syntax of `ref_grad`: inputs, output grads, parameters """ if ctx is None: ctx = nn.Context() if backward is None: backward = [True for _ in inputs] # Create Variables # print('create_variable') def create_variables(inputs, backward): vinputs = [] for i, b in zip(inputs, backward): if i is None: vinputs += [None] continue vinputs += [nn.Variable(i.shape, need_grad=b)] vinputs[-1].data.cast(i.dtype)[...] = i return vinputs # Half test if not disable_half_test: finputs = create_variables(inputs, backward) hinputs = create_variables(inputs, backward) half_test(rng, func, finputs, hinputs, func_args, func_kwargs, backward, ctx, func_name, atol=atol_half) vinputs = create_variables(inputs, backward) # Checking forward # print('checking forward') with nn.context_scope(ctx), nn.auto_forward(): o = func(*(vinputs + func_args), **func_kwargs) rinputs = copy.deepcopy(inputs) # inputs for ref_func refs = ref_func(*(rinputs + func_args), **func_kwargs) refs = force_tuple(refs) o = force_tuple(o) assert len(o) == len(refs) for i, ref in enumerate(refs): res = o[i].d assert_allclose(ref, res, atol=atol_f, err_msg="{} forward test fails".format(func_name)) # Checking recomputation vinputs = create_variables(inputs, backward) recomputation_test(rng, func, vinputs, func_args, func_kwargs, ctx) # Checking forward(clear_no_need_grad=True) if not disable_clear_no_need_grad_test: clear_no_need_grad_tester(rng, func, inputs, func_args, func_kwargs, backward, atol_f, ctx, func_name, insert_identity, auto_forward) # Checking function name try: import function_test_callback result = create_function_nnp(vinputs, o, func_name, func_args, func_kwargs) if result is not None: function_test_callback.callback(func_name, *result) except UnboundLocalError: pass except IndexError: pass except ImportError: pass # print('checking function name') if func_name is not None: assert o[0].parent.name == func_name # Checking backward # print('checking backward') if not True in backward: return # NNabla backward for v in vinputs: if v is None: continue if len(v.shape) == 0: v.g = randn(rng) continue v.g = randn(rng, *v.shape) # Verify grad vinputs = create_variables(inputs, backward) rinputs = copy.deepcopy(inputs) rinputs = [ rinput if test else None for rinput, test in zip(rinputs, backward) ] vgrads = [randn(rng, *o_.shape) for o_ in o] def reset_ograds(): ''' Reset output grads everytime we call backward. This is required because the output grad might be inplaced and modified during backward operation. ''' for ovar, g in zip(o, vgrads): ovar.g = g agrads, ngrads = compute_analytical_and_numerical_grad(o[0].parent, vinputs, o, rinputs, vgrads, epsilon=dstep, rng=rng, ref_grad=ref_grad) if ref_grad is not None: rinputs = copy.deepcopy(inputs) doutputs = copy.deepcopy(vgrads) ngrads = ref_grad(*(rinputs + doutputs + func_args), **func_kwargs, need_grad_flags=backward) assert_allclose( ngrads, agrads, atol=atol_b, err_msg="{} backward w/o accumulation test fails".format(func_name)) # Check if need_grad works for v, b in zip(vinputs, backward): if not b or v is None: continue v.grad.zero() v.need_grad = False reset_ograds() try: o[0].parent.forward(list(filter(lambda x: x is not None, vinputs)), o) o[0].parent.backward( list(filter(lambda x: x is not None, vinputs)), o) except RuntimeError as e: continue # TODO assert np.all(v.g == 0) # test accum=False for i in range(len(vinputs)): if vinputs[i] is None: continue v = vinputs[i] v.need_grad = backward[i] for i in range(len(vinputs)): if vinputs[i] is None: continue v = vinputs[i] if not backward[i]: continue f = o[0].parent # Prepare function inputs finputs = list(filter(lambda x: x is not None, vinputs)) # Save accum gradient result g = randn(rng, *v.shape) v.g = g reset_ograds() f.forward(finputs, o) f.backward(finputs, o) true_g = v.g - g # Check accum=False accum = [j != i for j, vv in enumerate(vinputs) if vv is not None] v.g = randn(rng, *v.shape) reset_ograds() f.forward(finputs, o) f.backward(finputs, o, accum) assert_allclose( v.g, true_g, atol=atol_accum, err_msg="{} backward w/ accumulation test fails.".format( func_name)) # Check accum=False with NaN gradient v.g = np.float32('nan') reset_ograds() f.forward(finputs, o) f.backward(finputs, o, accum) assert not np.any(np.isnan(v.g))
def context(**kw): """CPU Context.""" return nn.Context('cpu', array_classes()[0], '', 'default')
def backward_function_tester(rng, func, inputs=None, func_args=[], func_kwargs={}, atol_f=1e-4, atol_b=1e-3, atol_accum=5e-2, dstep=1e-3, backward=None, backward_b=None, ctx=None, non_accum_check=False, skip_backward_check=False, insert_identity=[], auto_forward=False): """ Automatic testing of backward function and backward pass of `func` by comparing it. The backward pass of `func` is the reference; therefore, the backward pass of `func` must be tested first! Syntax of `ref_func`: inputs, parameters """ if ctx is None: ctx = nn.Context() if backward is None: backward = [True for _ in inputs] def create_variables(inputs, backward): vinputs = [] for i, b in zip(inputs, backward): if i is None: vinputs += [None] continue vinp = nn.Variable(i.shape, need_grad=b) vinp.grad.zero() # grads always not accumulation vinputs += [vinp] vinputs[-1].data.cast(i.dtype)[...] = i return vinputs vinputs = create_variables(inputs, backward) vinputs_for_clear_buffer = create_variables(inputs, backward) vinputs_for_nn_grad = create_variables(inputs, backward) vinputs_identity = [] vinputs_identity_for_clear_buffer = [] vinputs_identity_for_nn_grad = [] if not insert_identity: insert_identity = [True] * len(vinputs) for idx, i in enumerate( zip(vinputs, vinputs_for_clear_buffer, vinputs_for_nn_grad)): with nn.auto_forward(auto_forward): i0, i1, i2 = i if i0 is None: vinputs_identity += [None] vinputs_identity_for_clear_buffer += [None] vinputs_identity_for_nn_grad += [None] elif insert_identity[idx]: vinputs_identity += [F.identity(i0)] vinputs_identity_for_clear_buffer += [F.identity(i1)] vinputs_identity_for_nn_grad += [F.identity(i2)] else: vinputs_identity += [i0] vinputs_identity_for_clear_buffer += [i1] vinputs_identity_for_nn_grad += [i2] # Forward and backward of the forward function with no buffer clear with nn.context_scope(ctx), nn.auto_forward(auto_forward): outputs0 = func(*(vinputs_identity + func_args), **func_kwargs) outputs0 = force_list(outputs0) F.sink(*outputs0).forward(clear_no_need_grad=False) grad_voutputs = [] for output in outputs0: ograd = rng.randn(*output.shape) grad_voutputs.append( nn.Variable.from_numpy_array(ograd).apply(need_grad=True)) output.g = ograd F.sink(*outputs0, one_input_grad=False).backward() vinputs = list(filter(lambda x: x is not None, vinputs)) vinputs_identity = list(filter(lambda x: x is not None, vinputs_identity)) vinputs_for_clear_buffer = list( filter(lambda x: x is not None, vinputs_for_clear_buffer)) grad_inputs0 = [inp.g.copy() for inp in vinputs] # Forward and backward of the forward function with clear redundant buffer with nn.context_scope(ctx), nn.auto_forward(auto_forward): outputs_for_clear_buffer = func( *(vinputs_identity_for_clear_buffer + func_args), **func_kwargs) outputs_for_clear_buffer = force_list(outputs_for_clear_buffer) outputs_for_clear_buffer = list( map(lambda x: F.identity(x) if x is not None else None, outputs_for_clear_buffer)) F.sink(*outputs_for_clear_buffer).forward(clear_no_need_grad=True) for o, ref_o in zip(outputs_for_clear_buffer, outputs0): o.g = ref_o.g # Check backward F.sink(*outputs_for_clear_buffer, one_input_grad=False).backward(clear_buffer=True) grad_inputs_for_clear_buffer = [ inp.g.copy() for inp in vinputs_for_clear_buffer ] for grad_ref, grad_res in zip(grad_inputs0, grad_inputs_for_clear_buffer): if grad_ref is None or grad_res is None: continue assert_allclose( grad_ref, grad_res, atol=atol_f, err_msg= "backward(clear_buffer=True) and backward(clear_buffer=False) results differ." ) # Forward of the backward function from nnabla.backward_functions import registry func_name = output.parent.info.type_name func_backward = registry[func_name] grad_vinputs = grad_voutputs + vinputs grad_vinputs_identity = grad_voutputs + vinputs_identity func_info_args = output.parent.info.args with nn.context_scope(ctx), nn.auto_forward(auto_forward): ograds0 = func_backward(grad_vinputs_identity, **func_info_args) ograds0 = force_list(ograds0) ograds0_ = list(filter(lambda o: o is not None, ograds0)) F.sink(*ograds0_).forward(clear_no_need_grad=True) outputs1 = [] for i, ograd in enumerate(ograds0): outputs1.append(ograd.d.copy()) if ograd is not None else \ outputs1.append(None) # Check num of returned elements assert_allclose( len(vinputs), len(outputs1), err_msg="Length of the outputs ({}) does not match " "the length of the inputs ({}) to the backward function".format( len(outputs1), len(vinputs))) # Check forward for i, elm in enumerate(zip(grad_inputs0, outputs1)): grad_ref, grad_res = elm if grad_ref is None or grad_res is None: continue assert_allclose( grad_ref, grad_res, atol=atol_f, err_msg= "Forward of the backward function ({}) fails at {}-th output.". format(func_backward.__name__, i)) # Check the same results between backward_function and nn.grad vinputs = [v for b, v in zip(backward, vinputs) if b] vinputs = list(filter(lambda x: x is not None, vinputs)) with nn.context_scope(ctx), nn.auto_forward(auto_forward): outputs0_for_nn_grad = func( *(vinputs_identity_for_nn_grad + func_args), **func_kwargs) outputs0_for_nn_grad = force_list(outputs0_for_nn_grad) vinputs_identity_for_nn_grad = [ v for b, v in zip(backward, vinputs_identity_for_nn_grad) if b ] vinputs_identity_for_nn_grad = list( filter(lambda x: x is not None, vinputs_identity_for_nn_grad)) ograds1 = nn.grad(outputs0_for_nn_grad, vinputs_identity_for_nn_grad, grad_outputs=[g.d.copy() for g in grad_voutputs]) F.sink(*ograds1).forward(clear_no_need_grad=True) ograds0 = list(filter(lambda o: o is not None, ograds0)) ograds1 = list(filter(lambda o: o is not None, ograds1)) for i in range(len(ograds0)): if ograds0[i].parent is None: continue assert_allclose(ograds0[i].d, ograds1[i].d, atol=atol_f, err_msg="nn.grad and backward_functon results differ.") # Check backward # needed since we sometimes do need_grad=False for optimization, e.g., mask. def set_inputs(inputs0, vinputs): begin = 0 for i in vinputs: end = begin + i.size i.d = inputs0[begin:end].reshape(i.shape) begin = end def obj_func(inputs0, voutput, vinputs): set_inputs(inputs0, vinputs) voutput.forward() y = voutput.d.copy() return y initial_grads = [] for grad_vinput in grad_vinputs: if grad_vinput is None: continue g = np.asarray(rng.randn(*grad_vinput.shape)) initial_grads.append(g) grad_inputs1 = np.concatenate( [v.d.flatten() for v in grad_vinputs if v is not None]) for i, ograd in enumerate(ograds0): # We can skip if the backward is the functions composite. # If the backward is of functions composite, # the numerical difference is really different from the analytical one for some functions. if skip_backward_check: continue if ograd is None or not backward[i]: continue for ig, v in zip(initial_grads, grad_vinputs): v.g = ig # This must be first since approx_fprime destroys the input values # analytical grad. rgrad = rng.randn() with nn.auto_forward(auto_forward): sum_ograd = F.sum(ograd) * rgrad sum_ograd.forward(clear_no_need_grad=True) sum_ograd.backward() analytical_grads = np.concatenate( [v.g.flatten() for v in grad_vinputs]) analytical_grads -= np.concatenate( [g.flatten() for g in initial_grads]) # numerical grad from scipy.optimize import approx_fprime numerical_grads = approx_fprime(grad_inputs1, obj_func, dstep, sum_ograd, grad_vinputs) # grad_vinputs: dy_1, ..., dy_n, x_1, ..., x_n # grad_voutputs: dy_1, ..., dy_n seps = [0] + np.cumsum([int(np.prod(v.shape)) for v in grad_vinputs]).tolist() ngrads = len(grad_voutputs) ninputs = len(grad_vinputs) backward_b = [True] * ninputs if backward_b is None else backward_b for k, sep in enumerate(zip(seps[:-1], seps[1:])): if k >= ngrads and not backward[k - ngrads] or not backward_b[k]: continue s0, s1 = sep analytical_grad = analytical_grads[s0:s1] numerical_grad = numerical_grads[s0:s1] assert_allclose( analytical_grad, numerical_grad, atol=atol_accum, err_msg= "Backward (accum) of the backward function ({}) wrt {}-th / {} input fails." .format(func_backward.__name__, k, ninputs)) # Some functions backward like AffineDataGrad and AffineFilterGrad does not check non-accum anywhere # so check those non-accum backward method here. if non_accum_check: # for any outputs, parents are the same function. parent = outputs0[0].parent inputs = parent.inputs # Accum initial_grads = np.concatenate( [inp.g.flatten() for inp, b in zip(inputs, backward) if b]) accum = [True] * len(inputs) parent.backward(inputs, outputs0, accum=accum) accum_grads = np.concatenate( [inp.g.flatten() for inp, b in zip(inputs, backward) if b]) non_accum_grads0 = accum_grads - initial_grads # Non-accum accum = [False] * len(inputs) parent.backward(inputs, outputs0, accum=accum) non_accum_grads1 = np.concatenate( [inp.g.flatten() for inp, b in zip(inputs, backward) if b]) # Check assert_allclose( non_accum_grads0, non_accum_grads1, atol=atol_b, err_msg="Backward (non-accum) of the backward function ({}) fails." .format(func_backward.__name__))
def list_context(func_name): try: import list_context_ext return list_context_ext.list(func_name) except: return [(nn.Context(), func_name)]